Coverage Report

Created: 2026-01-06 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wolfssl-sp-math-all-8bit/wolfcrypt/src/sp_int.c
Line
Count
Source
1
/* sp_int.c
2
 *
3
 * Copyright (C) 2006-2025 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Implementation by Sean Parkinson. */
23
24
/*
25
DESCRIPTION
26
This library provides single precision (SP) integer math functions.
27
28
*/
29
30
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
31
32
#if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
33
34
#ifdef NO_INLINE
35
    #include <wolfssl/wolfcrypt/misc.h>
36
#else
37
    #define WOLFSSL_MISC_INCLUDED
38
    #include <wolfcrypt/src/misc.c>
39
#endif
40
41
/* SP Build Options:
42
 * WOLFSSL_HAVE_SP_RSA:         Enable SP RSA support
43
 * WOLFSSL_HAVE_SP_DH:          Enable SP DH support
44
 * WOLFSSL_HAVE_SP_ECC:         Enable SP ECC support
45
 * WOLFSSL_SP_MATH:             Use only single precision math and algorithms
46
 *      it supports (no fastmath tfm.c or normal integer.c)
47
 * WOLFSSL_SP_MATH_ALL          Implementation of all MP functions
48
 *      (replacement for tfm.c and integer.c)
49
 * WOLFSSL_SP_SMALL:            Use smaller version of code and avoid large
50
 *      stack variables
51
 * WOLFSSL_SP_NO_MALLOC:        Always use stack, no heap XMALLOC/XFREE allowed
52
 * WOLFSSL_SP_NO_2048:          Disable RSA/DH 2048-bit support
53
 * WOLFSSL_SP_NO_3072:          Disable RSA/DH 3072-bit support
54
 * WOLFSSL_SP_4096:             Enable RSA/RH 4096-bit support
55
 * WOLFSSL_SP_NO_256            Disable ECC 256-bit SECP256R1 support
56
 * WOLFSSL_SP_384               Enable ECC 384-bit SECP384R1 support
57
 * WOLFSSL_SP_521               Enable ECC 521-bit SECP521R1 support
58
 * WOLFSSL_SP_ASM               Enable assembly speedups (detect platform)
59
 * WOLFSSL_SP_X86_64_ASM        Enable Intel x64 assembly implementation
60
 * WOLFSSL_SP_ARM32_ASM         Enable Aarch32 assembly implementation
61
 * WOLFSSL_SP_ARM64_ASM         Enable Aarch64 assembly implementation
62
 * WOLFSSL_SP_ARM_CORTEX_M_ASM  Enable Cortex-M assembly implementation
63
 * WOLFSSL_SP_ARM_THUMB_ASM     Enable ARM Thumb assembly implementation
64
 *      (used with -mthumb)
65
 * WOLFSSL_SP_X86_64            Enable Intel x86 64-bit assembly speedups
66
 * WOLFSSL_SP_X86               Enable Intel x86 assembly speedups
67
 * WOLFSSL_SP_ARM64             Enable Aarch64 assembly speedups
68
 * WOLFSSL_SP_ARM32             Enable ARM32 assembly speedups
69
 * WOLFSSL_SP_ARM32_UDIV        Enable word divide asm that uses UDIV instr
70
 * WOLFSSL_SP_ARM_THUMB         Enable ARM Thumb assembly speedups
71
 *                              (explicitly uses register 'r7')
72
 * WOLFSSL_SP_PPC64             Enable PPC64 assembly speedups
73
 * WOLFSSL_SP_PPC               Enable PPC assembly speedups
74
 * WOLFSSL_SP_MIPS64            Enable MIPS64 assembly speedups
75
 * WOLFSSL_SP_MIPS              Enable MIPS assembly speedups
76
 * WOLFSSL_SP_RISCV64           Enable RISCV64 assembly speedups
77
 * WOLFSSL_SP_RISCV32           Enable RISCV32 assembly speedups
78
 * WOLFSSL_SP_S390X             Enable S390X assembly speedups
79
 * SP_WORD_SIZE                 Force 32 or 64 bit mode
80
 * WOLFSSL_SP_NONBLOCK          Enables "non blocking" mode for SP math, which
81
 *      will return FP_WOULDBLOCK for long operations and function must be
82
 *      called again until complete.
83
 * WOLFSSL_SP_FAST_NCT_EXPTMOD  Enables the faster non-constant time modular
84
 *      exponentiation implementation.
85
 * WOLFSSL_SP_INT_NEGATIVE      Enables negative values to be used.
86
 * WOLFSSL_SP_INT_DIGIT_ALIGN   Enable when unaligned access of sp_int_digit
87
 *                              pointer is not allowed.
88
 * WOLFSSL_SP_NO_DYN_STACK      Disable use of dynamic stack items.
89
 *                              Dynamic arrays used when not small stack.
90
 * WOLFSSL_SP_FAST_MODEXP       Allow fast mod_exp with small C code
91
 * WOLFSSL_SP_LOW_MEM           Use algorithms that use less memory.
92
 */
93
94
/* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
95
#if defined(__clang__) && defined(__clang_major__) && \
96
    (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
97
    #undef WOLFSSL_SP_SMALL
98
#endif
99
100
#include <wolfssl/wolfcrypt/sp_int.h>
101
102
#ifdef WOLFSSL_SP_DYN_STACK
103
/* We are statically declaring a variable smaller than sp_int.
104
 * We track available memory in the 'size' field.
105
 * Disable warnings of sp_int being partly outside array bounds of variable.
106
 */
107
    PRAGMA_GCC_DIAG_PUSH
108
    PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"")
109
#endif
110
111
#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM)
112
    /* force off unneeded vector register save/restore. */
113
    #undef SAVE_VECTOR_REGISTERS
114
    #define SAVE_VECTOR_REGISTERS(fail_clause) SAVE_NO_VECTOR_REGISTERS(fail_clause)
115
    #undef RESTORE_VECTOR_REGISTERS
116
    #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS()
117
#endif
118
119
/* DECL_SP_INT: Declare one variable of type 'sp_int'. */
120
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
121
    !defined(WOLFSSL_SP_NO_MALLOC)
122
    /* Declare a variable that will be assigned a value on XMALLOC. */
123
    #define DECL_SP_INT(n, s)   \
124
5.62M
        sp_int* n = NULL
125
#else
126
    #ifdef WOLFSSL_SP_DYN_STACK
127
        /* Declare a variable on the stack with the required data size. */
128
        #define DECL_SP_INT(n, s)                       \
129
            sp_int_digit n##d[MP_INT_SIZEOF_DIGITS(s)]; \
130
            sp_int* (n) = (sp_int*)n##d
131
    #else
132
        /* Declare a variable on the stack. */
133
        #define DECL_SP_INT(n, s)               \
134
            sp_int n[1]
135
    #endif
136
#endif
137
138
/* ALLOC_SP_INT: Allocate an 'sp_int' of required size. */
139
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
140
    !defined(WOLFSSL_SP_NO_MALLOC)
141
    /* Dynamically allocate just enough data to support size. */
142
    #define ALLOC_SP_INT(n, s, err, h)                                         \
143
5.62M
    do {                                                                       \
144
5.62M
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
145
10
            (err) = MP_VAL;                                                    \
146
10
        }                                                                      \
147
5.62M
        if ((err) == MP_OKAY) {                                                \
148
5.62M
            (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h),                      \
149
5.62M
                DYNAMIC_TYPE_BIGINT);                                          \
150
5.62M
            if ((n) == NULL) {                                                 \
151
1.05k
                (err) = MP_MEM;                                                \
152
1.05k
            }                                                                  \
153
5.62M
        }                                                                      \
154
5.62M
    }                                                                          \
155
5.62M
    while (0)
156
157
    /* Dynamically allocate just enough data to support size - and set size. */
158
    #define ALLOC_SP_INT_SIZE(n, s, err, h)                                    \
159
5.56M
    do {                                                                       \
160
5.56M
        ALLOC_SP_INT(n, s, err, h);                                            \
161
5.56M
        if ((err) == MP_OKAY) {                                                \
162
5.56M
            (n)->size = (sp_size_t)(s);                                        \
163
5.56M
        }                                                                      \
164
5.56M
    }                                                                          \
165
5.56M
    while (0)
166
#else
167
    /* Array declared on stack - check size is valid. */
168
    #define ALLOC_SP_INT(n, s, err, h)                                         \
169
    do {                                                                       \
170
        if (((err) == MP_OKAY) && ((s) > (int)SP_INT_DIGITS)) {                \
171
            (err) = MP_VAL;                                                    \
172
        }                                                                      \
173
    }                                                                          \
174
    while (0)
175
176
    /* Array declared on stack - set the size field. */
177
    #define ALLOC_SP_INT_SIZE(n, s, err, h)                                    \
178
    do {                                                                       \
179
        ALLOC_SP_INT(n, s, err, h);                                            \
180
        if ((err) == MP_OKAY) {                                                \
181
            (n)->size = (sp_size_t)(s);                                        \
182
        }                                                                      \
183
    }                                                                          \
184
    while (0)
185
#endif
186
187
/* FREE_SP_INT: Free an 'sp_int' variable. */
188
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
189
    !defined(WOLFSSL_SP_NO_MALLOC)
190
    /* Free dynamically allocated data. */
191
    #define FREE_SP_INT(n, h)                   \
192
5.62M
    do {                                        \
193
5.62M
        if ((n) != NULL) {                      \
194
5.62M
            XFREE(n, h, DYNAMIC_TYPE_BIGINT);   \
195
5.62M
        }                                       \
196
5.62M
    }                                           \
197
5.62M
    while (0)
198
#else
199
    /* Nothing to do as declared on stack. */
200
    #define FREE_SP_INT(n, h) WC_DO_NOTHING
201
#endif
202
203
204
/* Declare a variable that will be assigned a value on XMALLOC. */
205
#define DECL_DYN_SP_INT_ARRAY(n, s, c)               \
206
23.3M
    sp_int* n##d = NULL;                             \
207
23.3M
    sp_int* (n)[c];                                  \
208
23.3M
    void *n ## _dummy_var = XMEMSET(n, 0, sizeof(n))
209
210
/* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
211
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
212
    !defined(WOLFSSL_SP_NO_MALLOC)
213
    /* Declare a variable that will be assigned a value on XMALLOC. */
214
    #define DECL_SP_INT_ARRAY(n, s, c)  \
215
23.2M
        DECL_DYN_SP_INT_ARRAY(n, s, c)
216
#elif defined(WOLFSSL_SP_DYN_STACK)
217
    /* Declare a variable on the stack with the required data size. */
218
    #define DECL_SP_INT_ARRAY(n, s, c)                    \
219
        sp_int_digit n##d[MP_INT_SIZEOF_DIGITS(s) * (c)]; \
220
        sp_int* (n)[c] = { NULL, }
221
#else
222
    /* Declare a variable on the stack. */
223
    #define DECL_SP_INT_ARRAY(n, s, c)      \
224
        sp_int n##d[c];                     \
225
        sp_int* (n)[c]
226
#endif
227
228
/* Dynamically allocate just enough data to support multiple sp_ints of the
229
 * required size. Use pointers into data to make up array and set sizes.
230
 */
231
14.6M
#define ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)                                \
232
14.6M
do {                                                                           \
233
14.6M
    (void)n ## _dummy_var;                                                     \
234
14.6M
    if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                         \
235
0
        (err) = MP_VAL;                                                        \
236
0
    }                                                                          \
237
14.6M
    if ((err) == MP_OKAY) {                                                    \
238
14.6M
        n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h),                   \
239
14.6M
                                                         DYNAMIC_TYPE_BIGINT); \
240
14.6M
        if (n##d == NULL) {                                                    \
241
1.74k
            (err) = MP_MEM;                                                    \
242
1.74k
        }                                                                      \
243
14.6M
        else {                                                                 \
244
14.6M
            int n##ii;                                                         \
245
14.6M
            (n)[0] = n##d;                                                     \
246
14.6M
            (n)[0]->size = (sp_size_t)(s);                                     \
247
37.0M
            for (n##ii = 1; n##ii < (int)(c); n##ii++) {                       \
248
22.3M
                (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                     \
249
22.3M
                (n)[n##ii]->size = (sp_size_t)(s);                             \
250
22.3M
            }                                                                  \
251
14.6M
        }                                                                      \
252
14.6M
    }                                                                          \
253
14.6M
}                                                                              \
254
14.6M
while (0)
255
256
/* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of required size. */
257
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
258
    !defined(WOLFSSL_SP_NO_MALLOC)
259
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
260
14.6M
        ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)
261
#elif defined(WOLFSSL_SP_DYN_STACK)
262
    /* Data declared on stack that supports multiple sp_ints of the
263
     * required size. Use pointers into data to make up array and set sizes.
264
     */
265
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                                \
266
    do {                                                                       \
267
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
268
            (err) = MP_VAL;                                                    \
269
        }                                                                      \
270
        if ((err) == MP_OKAY) {                                                \
271
            int n##ii;                                                         \
272
            (n)[0] = (sp_int*)n##d;                                            \
273
            ((sp_int_minimal*)(n)[0])->size = (sp_size_t)(s);                  \
274
            for (n##ii = 1; n##ii < (int)(c); n##ii++) {                       \
275
                (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                     \
276
                ((sp_int_minimal*)(n)[n##ii])->size = (sp_size_t)(s);          \
277
            }                                                                  \
278
        }                                                                      \
279
    }                                                                          \
280
    while (0)
281
#else
282
    /* Data declared on stack that supports multiple sp_ints of the
283
     * required size. Set into array and set sizes.
284
     */
285
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                                \
286
    do {                                                                       \
287
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
288
            (err) = MP_VAL;                                                    \
289
        }                                                                      \
290
        if ((err) == MP_OKAY) {                                                \
291
            int n##ii;                                                         \
292
            for (n##ii = 0; n##ii < (int)(c); n##ii++) {                       \
293
                (n)[n##ii] = &n##d[n##ii];                                     \
294
                (n)[n##ii]->size = (sp_size_t)(s);                             \
295
            }                                                                  \
296
        }                                                                      \
297
    }                                                                          \
298
    while (0)
299
#endif
300
301
/* Free data variable that was dynamically allocated. */
302
23.3M
#define FREE_DYN_SP_INT_ARRAY(n, h)             \
303
23.3M
do {                                            \
304
23.3M
    if (n##d != NULL) {                         \
305
14.6M
        XFREE(n##d, h, DYNAMIC_TYPE_BIGINT);    \
306
14.6M
    }                                           \
307
23.3M
}                                               \
308
23.3M
while (0)
309
310
/* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
311
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
312
    !defined(WOLFSSL_SP_NO_MALLOC)
313
    #define FREE_SP_INT_ARRAY(n, h)                 \
314
23.2M
        FREE_DYN_SP_INT_ARRAY(n, h)
315
#else
316
    /* Nothing to do as data declared on stack. */
317
    #define FREE_SP_INT_ARRAY(n, h) WC_DO_NOTHING
318
#endif
319
320
321
#ifndef WOLFSSL_NO_ASM
322
    #ifdef __IAR_SYSTEMS_ICC__
323
        #define __asm__        asm
324
        #define __volatile__   volatile
325
    #endif /* __IAR_SYSTEMS_ICC__ */
326
    #ifdef __KEIL__
327
        #define __asm__        __asm
328
        #define __volatile__   volatile
329
    #endif
330
331
    #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
332
/*
333
 * CPU: x86_64
334
 */
335
336
#ifndef _MSC_VER
337
/* Multiply va by vb and store double size result in: vh | vl */
338
#define SP_ASM_MUL(vl, vh, va, vb)                       \
339
    __asm__ __volatile__ (                               \
340
        "movq %[b], %%rax \n\t"                    \
341
        "mulq %[a]    \n\t"                    \
342
        "movq %%rax, %[l] \n\t"                    \
343
        "movq %%rdx, %[h] \n\t"                    \
344
        : [h] "+r" (vh), [l] "+r" (vl)                   \
345
        : [a] "rm" (va), [b] "rm" (vb)                   \
346
        : "%rax", "%rdx", "cc"                           \
347
    )
348
/* Multiply va by vb and store double size result in: vo | vh | vl */
349
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
350
    __asm__ __volatile__ (                               \
351
        "movq %[b], %%rax \n\t"                    \
352
        "mulq %[a]    \n\t"                    \
353
        "movq $0   , %[o] \n\t"                    \
354
        "movq %%rax, %[l] \n\t"                    \
355
        "movq %%rdx, %[h] \n\t"                    \
356
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
357
        : [a] "m" (va), [b] "m" (vb)                     \
358
        : "%rax", "%rdx", "cc"                           \
359
    )
360
/* Multiply va by vb and add double size result into: vo | vh | vl */
361
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
362
    __asm__ __volatile__ (                               \
363
        "movq %[b], %%rax \n\t"                    \
364
        "mulq %[a]    \n\t"                    \
365
        "addq %%rax, %[l] \n\t"                    \
366
        "adcq %%rdx, %[h] \n\t"                    \
367
        "adcq $0   , %[o] \n\t"                    \
368
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
369
        : [a] "rm" (va), [b] "rm" (vb)                   \
370
        : "%rax", "%rdx", "cc"                           \
371
    )
372
/* Multiply va by vb and add double size result into: vh | vl */
373
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
374
    __asm__ __volatile__ (                               \
375
        "movq %[b], %%rax \n\t"                    \
376
        "mulq %[a]    \n\t"                    \
377
        "addq %%rax, %[l] \n\t"                    \
378
        "adcq %%rdx, %[h] \n\t"                    \
379
        : [l] "+r" (vl), [h] "+r" (vh)                   \
380
        : [a] "rm" (va), [b] "rm" (vb)                   \
381
        : "%rax", "%rdx", "cc"                           \
382
    )
383
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
384
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
385
    __asm__ __volatile__ (                               \
386
        "movq %[b], %%rax \n\t"                    \
387
        "mulq %[a]    \n\t"                    \
388
        "addq %%rax, %[l] \n\t"                    \
389
        "adcq %%rdx, %[h] \n\t"                    \
390
        "adcq $0   , %[o] \n\t"                    \
391
        "addq %%rax, %[l] \n\t"                    \
392
        "adcq %%rdx, %[h] \n\t"                    \
393
        "adcq $0   , %[o] \n\t"                    \
394
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
395
        : [a] "rm" (va), [b] "rm" (vb)                   \
396
        : "%rax", "%rdx", "cc"                           \
397
    )
398
/* Multiply va by vb and add double size result twice into: vo | vh | vl
399
 * Assumes first add will not overflow vh | vl
400
 */
401
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
402
    __asm__ __volatile__ (                               \
403
        "movq %[b], %%rax \n\t"                    \
404
        "mulq %[a]    \n\t"                    \
405
        "addq %%rax, %[l] \n\t"                    \
406
        "adcq %%rdx, %[h] \n\t"                    \
407
        "addq %%rax, %[l] \n\t"                    \
408
        "adcq %%rdx, %[h] \n\t"                    \
409
        "adcq $0   , %[o] \n\t"                    \
410
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
411
        : [a] "rm" (va), [b] "rm" (vb)                   \
412
        : "%rax", "%rdx", "cc"                           \
413
    )
414
/* Square va and store double size result in: vh | vl */
415
#define SP_ASM_SQR(vl, vh, va)                           \
416
    __asm__ __volatile__ (                               \
417
        "movq %[a], %%rax \n\t"                    \
418
        "mulq %%rax   \n\t"                    \
419
        "movq %%rax, %[l] \n\t"                    \
420
        "movq %%rdx, %[h] \n\t"                    \
421
        : [h] "+r" (vh), [l] "+r" (vl)                   \
422
        : [a] "rm" (va)                                  \
423
        : "%rax", "%rdx", "cc"                           \
424
    )
425
/* Square va and add double size result into: vo | vh | vl */
426
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
427
    __asm__ __volatile__ (                               \
428
        "movq %[a], %%rax \n\t"                    \
429
        "mulq %%rax   \n\t"                    \
430
        "addq %%rax, %[l] \n\t"                    \
431
        "adcq %%rdx, %[h] \n\t"                    \
432
        "adcq $0   , %[o] \n\t"                    \
433
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
434
        : [a] "rm" (va)                                  \
435
        : "%rax", "%rdx", "cc"                           \
436
    )
437
/* Square va and add double size result into: vh | vl */
438
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
439
    __asm__ __volatile__ (                               \
440
        "movq %[a], %%rax \n\t"                    \
441
        "mulq %%rax   \n\t"                    \
442
        "addq %%rax, %[l] \n\t"                    \
443
        "adcq %%rdx, %[h] \n\t"                    \
444
        : [l] "+r" (vl), [h] "+r" (vh)                   \
445
        : [a] "rm" (va)                                  \
446
        : "%rax", "%rdx", "cc"                           \
447
    )
448
/* Add va into: vh | vl */
449
#define SP_ASM_ADDC(vl, vh, va)                          \
450
    __asm__ __volatile__ (                               \
451
        "addq %[a], %[l]  \n\t"                    \
452
        "adcq $0  , %[h]  \n\t"                    \
453
        : [l] "+r" (vl), [h] "+r" (vh)                   \
454
        : [a] "rm" (va)                                  \
455
        : "cc"                                           \
456
    )
457
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
458
    __asm__ __volatile__ (                               \
459
        "addq %[a], %[l]  \n\t"                    \
460
        "adcq $0  , %[h]  \n\t"                    \
461
        : [l] "+r" (vl), [h] "+r" (vh)                   \
462
        : [a] "r" (va)                                   \
463
        : "cc"                                           \
464
    )
465
/* Sub va from: vh | vl */
466
#define SP_ASM_SUBB(vl, vh, va)                          \
467
    __asm__ __volatile__ (                               \
468
        "subq %[a], %[l]  \n\t"                    \
469
        "sbbq $0  , %[h]  \n\t"                    \
470
        : [l] "+r" (vl), [h] "+r" (vh)                   \
471
        : [a] "rm" (va)                                  \
472
        : "cc"                                           \
473
    )
474
/* Sub va from: vh | vl */
475
#define SP_ASM_SUBB_REG(vl, vh, va)                      \
476
    __asm__ __volatile__ (                               \
477
        "subq %[a], %[l]  \n\t"                    \
478
        "sbbq $0  , %[h]  \n\t"                    \
479
        : [l] "+r" (vl), [h] "+r" (vh)                   \
480
        : [a] "r" (va)                                   \
481
        : "cc"                                           \
482
    )
483
/* Add two times vc | vb | va into vo | vh | vl */
484
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
485
    __asm__ __volatile__ (                               \
486
        "addq %[a], %[l]  \n\t"                    \
487
        "adcq %[b], %[h]  \n\t"                    \
488
        "adcq %[c], %[o]  \n\t"                    \
489
        "addq %[a], %[l]  \n\t"                    \
490
        "adcq %[b], %[h]  \n\t"                    \
491
        "adcq %[c], %[o]  \n\t"                    \
492
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
493
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
494
        : "cc"                                           \
495
    )
496
/* Index of highest bit set. */
497
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
498
    __asm__ __volatile__ (                               \
499
        "bsr  %[a], %[i]  \n\t"                    \
500
        : [i] "=r" (vi)                                  \
501
        : [a] "r" (va)                                   \
502
        : "cc"                                           \
503
    )
504
#else
505
#include <intrin.h>
506
507
/* Multiply va by vb and store double size result in: vh | vl */
508
#define SP_ASM_MUL(vl, vh, va, vb)                       \
509
    vl = _umul128(va, vb, &vh)
510
511
/* Multiply va by vb and store double size result in: vo | vh | vl */
512
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
513
    do {                                                 \
514
        vl = _umul128(va, vb, &vh);                      \
515
        vo = 0;                                          \
516
    }                                                    \
517
    while (0)
518
519
/* Multiply va by vb and add double size result into: vo | vh | vl */
520
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
521
    do {                                                 \
522
        unsigned __int64 vtl, vth;                       \
523
        unsigned char c;                                 \
524
        vtl = _umul128(va, vb, &vth);                    \
525
        c = _addcarry_u64(0, vl, vtl, &vl);              \
526
        c = _addcarry_u64(c, vh, vth, &vh);              \
527
            _addcarry_u64(c, vo,   0, &vo);              \
528
    }                                                    \
529
    while (0)
530
531
/* Multiply va by vb and add double size result into: vh | vl */
532
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
533
    do {                                                 \
534
        unsigned __int64 vtl, vth;                       \
535
        unsigned char c;                                 \
536
        vtl = _umul128(va, vb, &vth);                    \
537
        c = _addcarry_u64(0, vl, vtl, &vl);              \
538
            _addcarry_u64(c, vh, vth, &vh);              \
539
    }                                                    \
540
    while (0)
541
542
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
543
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
544
    do {                                                 \
545
        unsigned __int64 vtl, vth;                       \
546
        unsigned char c;                                 \
547
        vtl = _umul128(va, vb, &vth);                    \
548
        c = _addcarry_u64(0, vl, vtl, &vl);              \
549
        c = _addcarry_u64(c, vh, vth, &vh);              \
550
            _addcarry_u64(c, vo,   0, &vo);              \
551
        c = _addcarry_u64(0, vl, vtl, &vl);              \
552
        c = _addcarry_u64(c, vh, vth, &vh);              \
553
            _addcarry_u64(c, vo,   0, &vo);              \
554
    }                                                    \
555
    while (0)
556
/* Multiply va by vb and add double size result twice into: vo | vh | vl
557
 * Assumes first add will not overflow vh | vl
558
 */
559
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
560
    do {                                                 \
561
        unsigned __int64 vtl, vth;                       \
562
        unsigned char c;                                 \
563
        vtl = _umul128(va, vb, &vth);                    \
564
        c = _addcarry_u64(0, vl, vtl, &vl);              \
565
            _addcarry_u64(c, vh, vth, &vh);              \
566
        c = _addcarry_u64(0, vl, vtl, &vl);              \
567
        c = _addcarry_u64(c, vh, vth, &vh);              \
568
            _addcarry_u64(c, vo,   0, &vo);              \
569
    }                                                    \
570
    while (0)
571
572
 /* Square va and store double size result in: vh | vl */
573
#define SP_ASM_SQR(vl, vh, va)                           \
574
    vl = _umul128(va, va, &vh)
575
576
/* Square va and add double size result into: vo | vh | vl */
577
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
578
    do {                                                 \
579
        unsigned __int64 vtl, vth;                       \
580
        unsigned char c;                                 \
581
        vtl = _umul128(va, va, &vth);                    \
582
        c = _addcarry_u64(0, vl, vtl, &vl);              \
583
        c = _addcarry_u64(c, vh, vth, &vh);              \
584
            _addcarry_u64(c, vo,   0, &vo);              \
585
    }                                                    \
586
    while (0)
587
588
/* Square va and add double size result into: vh | vl */
589
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
590
    do {                                                 \
591
        unsigned __int64 vtl, vth;                       \
592
        unsigned char c;                                 \
593
        vtl = _umul128(va, va, &vth);                    \
594
        c = _addcarry_u64(0, vl, vtl, &vl);              \
595
            _addcarry_u64(c, vh, vth, &vh);              \
596
    }                                                    \
597
    while (0)
598
599
/* Add va into: vh | vl */
600
#define SP_ASM_ADDC(vl, vh, va)                          \
601
    do {                                                 \
602
        unsigned char c;                                 \
603
        c = _addcarry_u64(0, vl, va, &vl);               \
604
            _addcarry_u64(c, vh,  0, &vh);               \
605
    }                                                    \
606
    while (0)
607
608
/* Add va, variable in a register, into: vh | vl */
609
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
610
    do {                                                 \
611
        unsigned char c;                                 \
612
        c = _addcarry_u64(0, vl, va, &vl);               \
613
            _addcarry_u64(c, vh,  0, &vh);               \
614
    }                                                    \
615
    while (0)
616
617
/* Sub va from: vh | vl */
618
#define SP_ASM_SUBB(vl, vh, va)                          \
619
    do {                                                 \
620
        unsigned char c;                                 \
621
        c = _subborrow_u64(0, vl, va, &vl);              \
622
            _subborrow_u64(c, vh,  0, &vh);              \
623
    }                                                    \
624
    while (0)
625
626
/* Add two times vc | vb | va into vo | vh | vl */
627
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
628
    do {                                                 \
629
        unsigned char c;                                 \
630
        c = _addcarry_u64(0, vl, va, &vl);               \
631
        c = _addcarry_u64(c, vh, vb, &vh);               \
632
            _addcarry_u64(c, vo, vc, &vo);               \
633
        c = _addcarry_u64(0, vl, va, &vl);               \
634
        c = _addcarry_u64(c, vh, vb, &vh);               \
635
            _addcarry_u64(c, vo, vc, &vo);               \
636
    }                                                    \
637
    while (0)
638
/* Index of highest bit set. */
639
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
640
    do {                                                 \
641
        unsigned long idx;                               \
642
        _BitScanReverse64(&idx, va);                     \
643
        vi = idx;                                        \
644
    }                                                    \
645
    while (0)
646
#endif
647
648
#if !defined(WOLFSSL_SP_DIV_WORD_HALF) && (!defined(_MSC_VER) || \
649
    _MSC_VER >= 1920)
650
/* Divide a two digit number by a digit number and return. (hi | lo) / d
651
 *
652
 * Using divq instruction on Intel x64.
653
 *
654
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
655
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
656
 * @param  [in]  d   SP integer digit. Number to divide by.
657
 * @return  The division result.
658
 */
659
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
660
                                          sp_int_digit d)
661
{
662
#ifndef _MSC_VER
663
    __asm__ __volatile__ (
664
        "divq %2"
665
        : "+a" (lo)
666
        : "d" (hi), "r" (d)
667
        : "cc"
668
    );
669
    return lo;
670
#elif defined(_MSC_VER) && _MSC_VER >= 1920
671
    return _udiv128(hi, lo, d, NULL);
672
#endif
673
}
674
#define SP_ASM_DIV_WORD
675
#endif
676
677
#define SP_INT_ASM_AVAILABLE
678
679
    #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
680
681
    #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
682
/*
683
 * CPU: x86
684
 */
685
686
/* Multiply va by vb and store double size result in: vh | vl */
687
#define SP_ASM_MUL(vl, vh, va, vb)                       \
688
    __asm__ __volatile__ (                               \
689
        "movl %[b], %%eax \n\t"                    \
690
        "mull %[a]    \n\t"                    \
691
        "movl %%eax, %[l] \n\t"                    \
692
        "movl %%edx, %[h] \n\t"                    \
693
        : [h] "+r" (vh), [l] "+r" (vl)                   \
694
        : [a] "rm" (va), [b] "rm" (vb)                   \
695
        : "eax", "edx", "cc"                             \
696
    )
697
/* Multiply va by vb and store double size result in: vo | vh | vl */
698
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
699
    __asm__ __volatile__ (                               \
700
        "movl %[b], %%eax \n\t"                    \
701
        "mull %[a]    \n\t"                    \
702
        "movl $0   , %[o] \n\t"                    \
703
        "movl %%eax, %[l] \n\t"                    \
704
        "movl %%edx, %[h] \n\t"                    \
705
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
706
        : [a] "m" (va), [b] "m" (vb)                     \
707
        : "eax", "edx", "cc"                             \
708
    )
709
/* Multiply va by vb and add double size result into: vo | vh | vl */
710
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
711
    __asm__ __volatile__ (                               \
712
        "movl %[b], %%eax \n\t"                    \
713
        "mull %[a]    \n\t"                    \
714
        "addl %%eax, %[l] \n\t"                    \
715
        "adcl %%edx, %[h] \n\t"                    \
716
        "adcl $0   , %[o] \n\t"                    \
717
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
718
        : [a] "rm" (va), [b] "rm" (vb)                   \
719
        : "eax", "edx", "cc"                             \
720
    )
721
/* Multiply va by vb and add double size result into: vh | vl */
722
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
723
    __asm__ __volatile__ (                               \
724
        "movl %[b], %%eax \n\t"                    \
725
        "mull %[a]    \n\t"                    \
726
        "addl %%eax, %[l] \n\t"                    \
727
        "adcl %%edx, %[h] \n\t"                    \
728
        : [l] "+r" (vl), [h] "+r" (vh)                   \
729
        : [a] "rm" (va), [b] "rm" (vb)                   \
730
        : "eax", "edx", "cc"                             \
731
    )
732
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
733
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
734
    __asm__ __volatile__ (                               \
735
        "movl %[b], %%eax \n\t"                    \
736
        "mull %[a]    \n\t"                    \
737
        "addl %%eax, %[l] \n\t"                    \
738
        "adcl %%edx, %[h] \n\t"                    \
739
        "adcl $0   , %[o] \n\t"                    \
740
        "addl %%eax, %[l] \n\t"                    \
741
        "adcl %%edx, %[h] \n\t"                    \
742
        "adcl $0   , %[o] \n\t"                    \
743
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
744
        : [a] "rm" (va), [b] "rm" (vb)                   \
745
        : "eax", "edx", "cc"                             \
746
    )
747
/* Multiply va by vb and add double size result twice into: vo | vh | vl
748
 * Assumes first add will not overflow vh | vl
749
 */
750
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
751
    __asm__ __volatile__ (                               \
752
        "movl %[b], %%eax \n\t"                    \
753
        "mull %[a]    \n\t"                    \
754
        "addl %%eax, %[l] \n\t"                    \
755
        "adcl %%edx, %[h] \n\t"                    \
756
        "addl %%eax, %[l] \n\t"                    \
757
        "adcl %%edx, %[h] \n\t"                    \
758
        "adcl $0   , %[o] \n\t"                    \
759
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
760
        : [a] "rm" (va), [b] "rm" (vb)                   \
761
        : "eax", "edx", "cc"                             \
762
    )
763
/* Square va and store double size result in: vh | vl */
764
#define SP_ASM_SQR(vl, vh, va)                           \
765
    __asm__ __volatile__ (                               \
766
        "movl %[a], %%eax \n\t"                    \
767
        "mull %%eax   \n\t"                    \
768
        "movl %%eax, %[l] \n\t"                    \
769
        "movl %%edx, %[h] \n\t"                    \
770
        : [h] "+r" (vh), [l] "+r" (vl)                   \
771
        : [a] "rm" (va)                                  \
772
        : "eax", "edx", "cc"                             \
773
    )
774
/* Square va and add double size result into: vo | vh | vl */
775
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
776
    __asm__ __volatile__ (                               \
777
        "movl %[a], %%eax \n\t"                    \
778
        "mull %%eax   \n\t"                    \
779
        "addl %%eax, %[l] \n\t"                    \
780
        "adcl %%edx, %[h] \n\t"                    \
781
        "adcl $0   , %[o] \n\t"                    \
782
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
783
        : [a] "rm" (va)                                  \
784
        : "eax", "edx", "cc"                             \
785
    )
786
/* Square va and add double size result into: vh | vl */
787
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
788
    __asm__ __volatile__ (                               \
789
        "movl %[a], %%eax \n\t"                    \
790
        "mull %%eax   \n\t"                    \
791
        "addl %%eax, %[l] \n\t"                    \
792
        "adcl %%edx, %[h] \n\t"                    \
793
        : [l] "+r" (vl), [h] "+r" (vh)                   \
794
        : [a] "rm" (va)                                  \
795
        : "eax", "edx", "cc"                             \
796
    )
797
/* Add va into: vh | vl */
798
#define SP_ASM_ADDC(vl, vh, va)                          \
799
    __asm__ __volatile__ (                               \
800
        "addl %[a], %[l]  \n\t"                    \
801
        "adcl $0  , %[h]  \n\t"                    \
802
        : [l] "+r" (vl), [h] "+r" (vh)                   \
803
        : [a] "rm" (va)                                  \
804
        : "cc"                                           \
805
    )
806
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
807
    __asm__ __volatile__ (                               \
808
        "addl %[a], %[l]  \n\t"                    \
809
        "adcl $0  , %[h]  \n\t"                    \
810
        : [l] "+r" (vl), [h] "+r" (vh)                   \
811
        : [a] "r" (va)                                   \
812
        : "cc"                                           \
813
    )
814
/* Sub va from: vh | vl */
815
#define SP_ASM_SUBB(vl, vh, va)                          \
816
    __asm__ __volatile__ (                               \
817
        "subl %[a], %[l]  \n\t"                    \
818
        "sbbl $0  , %[h]  \n\t"                    \
819
        : [l] "+r" (vl), [h] "+r" (vh)                   \
820
        : [a] "rm" (va)                                  \
821
        : "cc"                                           \
822
    )
823
/* Sub va from: vh | vl */
824
#define SP_ASM_SUBB_REG(vl, vh, va)                      \
825
    __asm__ __volatile__ (                               \
826
        "subl %[a], %[l]  \n\t"                    \
827
        "sbbl $0  , %[h]  \n\t"                    \
828
        : [l] "+r" (vl), [h] "+r" (vh)                   \
829
        : [a] "r" (va)                                   \
830
        : "cc"                                           \
831
    )
832
/* Add two times vc | vb | va into vo | vh | vl */
833
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
834
    __asm__ __volatile__ (                               \
835
        "addl %[a], %[l]  \n\t"                    \
836
        "adcl %[b], %[h]  \n\t"                    \
837
        "adcl %[c], %[o]  \n\t"                    \
838
        "addl %[a], %[l]  \n\t"                    \
839
        "adcl %[b], %[h]  \n\t"                    \
840
        "adcl %[c], %[o]  \n\t"                    \
841
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
842
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
843
        : "cc"                                           \
844
    )
845
/* Index of highest bit set. */
846
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
847
    __asm__ __volatile__ (                               \
848
        "bsr  %[a], %[i]  \n\t"                    \
849
        : [i] "=r" (vi)                                  \
850
        : [a] "r" (va)                                   \
851
        : "cc"                                           \
852
    )
853
854
#ifndef WOLFSSL_SP_DIV_WORD_HALF
855
/* Divide a two digit number by a digit number and return. (hi | lo) / d
856
 *
857
 * Using divl instruction on Intel x64.
858
 *
859
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
860
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
861
 * @param  [in]  d   SP integer digit. Number to divide by.
862
 * @return  The division result.
863
 */
864
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
865
                                          sp_int_digit d)
866
{
867
    __asm__ __volatile__ (
868
        "divl %2"
869
        : "+a" (lo)
870
        : "d" (hi), "r" (d)
871
        : "cc"
872
    );
873
    return lo;
874
}
875
#define SP_ASM_DIV_WORD
876
#endif
877
878
#define SP_INT_ASM_AVAILABLE
879
880
    #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
881
882
    #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
883
/*
884
 * CPU: Aarch64
885
 */
886
887
/* Multiply va by vb and store double size result in: vh | vl */
888
#define SP_ASM_MUL(vl, vh, va, vb)                       \
889
    __asm__ __volatile__ (                               \
890
        "mul  %[l], %[a], %[b]  \n\t"            \
891
        "umulh  %[h], %[a], %[b]  \n\t"            \
892
        : [h] "+r" (vh), [l] "+r" (vl)                   \
893
        : [a] "r" (va), [b] "r" (vb)                     \
894
        : "cc"                                           \
895
    )
896
/* Multiply va by vb and store double size result in: vo | vh | vl */
897
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
898
    __asm__ __volatile__ (                               \
899
        "mul  x8, %[a], %[b]    \n\t"            \
900
        "umulh  %[h], %[a], %[b]  \n\t"            \
901
        "mov  %[l], x8    \n\t"            \
902
        "mov  %[o], xzr   \n\t"            \
903
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
904
        : [a] "r" (va), [b] "r" (vb)                     \
905
        : "x8", "cc"                                     \
906
    )
907
/* Multiply va by vb and add double size result into: vo | vh | vl */
908
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
909
    __asm__ __volatile__ (                               \
910
        "mul  x8, %[a], %[b]    \n\t"            \
911
        "umulh  x9, %[a], %[b]    \n\t"            \
912
        "adds %[l], %[l], x8    \n\t"            \
913
        "adcs %[h], %[h], x9    \n\t"            \
914
        "adc  %[o], %[o], xzr   \n\t"            \
915
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
916
        : [a] "r" (va), [b] "r" (vb)                     \
917
        : "x8", "x9", "cc"                               \
918
    )
919
/* Multiply va by vb and add double size result into: vh | vl */
920
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
921
    __asm__ __volatile__ (                               \
922
        "mul  x8, %[a], %[b]    \n\t"            \
923
        "umulh  x9, %[a], %[b]    \n\t"            \
924
        "adds %[l], %[l], x8    \n\t"            \
925
        "adc  %[h], %[h], x9    \n\t"            \
926
        : [l] "+r" (vl), [h] "+r" (vh)                   \
927
        : [a] "r" (va), [b] "r" (vb)                     \
928
        : "x8", "x9", "cc"                               \
929
    )
930
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
931
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
932
    __asm__ __volatile__ (                               \
933
        "mul  x8, %[a], %[b]    \n\t"            \
934
        "umulh  x9, %[a], %[b]    \n\t"            \
935
        "adds %[l], %[l], x8    \n\t"            \
936
        "adcs %[h], %[h], x9    \n\t"            \
937
        "adc  %[o], %[o], xzr   \n\t"            \
938
        "adds %[l], %[l], x8    \n\t"            \
939
        "adcs %[h], %[h], x9    \n\t"            \
940
        "adc  %[o], %[o], xzr   \n\t"            \
941
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
942
        : [a] "r" (va), [b] "r" (vb)                     \
943
        : "x8", "x9", "cc"                               \
944
    )
945
/* Multiply va by vb and add double size result twice into: vo | vh | vl
946
 * Assumes first add will not overflow vh | vl
947
 */
948
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
949
    __asm__ __volatile__ (                               \
950
        "mul  x8, %[a], %[b]    \n\t"            \
951
        "umulh  x9, %[a], %[b]    \n\t"            \
952
        "adds %[l], %[l], x8    \n\t"            \
953
        "adc  %[h], %[h], x9    \n\t"            \
954
        "adds %[l], %[l], x8    \n\t"            \
955
        "adcs %[h], %[h], x9    \n\t"            \
956
        "adc  %[o], %[o], xzr   \n\t"            \
957
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
958
        : [a] "r" (va), [b] "r" (vb)                     \
959
        : "x8", "x9", "cc"                               \
960
    )
961
/* Square va and store double size result in: vh | vl */
962
#define SP_ASM_SQR(vl, vh, va)                           \
963
    __asm__ __volatile__ (                               \
964
        "mul  %[l], %[a], %[a]  \n\t"            \
965
        "umulh  %[h], %[a], %[a]  \n\t"            \
966
        : [h] "+r" (vh), [l] "+r" (vl)                   \
967
        : [a] "r" (va)                                   \
968
        : "cc"                                           \
969
    )
970
/* Square va and add double size result into: vo | vh | vl */
971
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
972
    __asm__ __volatile__ (                               \
973
        "mul  x8, %[a], %[a]    \n\t"            \
974
        "umulh  x9, %[a], %[a]    \n\t"            \
975
        "adds %[l], %[l], x8    \n\t"            \
976
        "adcs %[h], %[h], x9    \n\t"            \
977
        "adc  %[o], %[o], xzr   \n\t"            \
978
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
979
        : [a] "r" (va)                                   \
980
        : "x8", "x9", "cc"                               \
981
    )
982
/* Square va and add double size result into: vh | vl */
983
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
984
    __asm__ __volatile__ (                               \
985
        "mul  x8, %[a], %[a]    \n\t"            \
986
        "umulh  x9, %[a], %[a]    \n\t"            \
987
        "adds %[l], %[l], x8    \n\t"            \
988
        "adc  %[h], %[h], x9    \n\t"            \
989
        : [l] "+r" (vl), [h] "+r" (vh)                   \
990
        : [a] "r" (va)                                   \
991
        : "x8", "x9", "cc"                               \
992
    )
993
/* Add va into: vh | vl */
994
#define SP_ASM_ADDC(vl, vh, va)                          \
995
    __asm__ __volatile__ (                               \
996
        "adds %[l], %[l], %[a]  \n\t"            \
997
        "adc  %[h], %[h], xzr   \n\t"            \
998
        : [l] "+r" (vl), [h] "+r" (vh)                   \
999
        : [a] "r" (va)                                   \
1000
        : "cc"                                           \
1001
    )
1002
/* Sub va from: vh | vl */
1003
#define SP_ASM_SUBB(vl, vh, va)                          \
1004
    __asm__ __volatile__ (                               \
1005
        "subs %[l], %[l], %[a]  \n\t"            \
1006
        "sbc  %[h], %[h], xzr   \n\t"            \
1007
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1008
        : [a] "r" (va)                                   \
1009
        : "cc"                                           \
1010
    )
1011
/* Add two times vc | vb | va into vo | vh | vl */
1012
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1013
    __asm__ __volatile__ (                               \
1014
        "adds %[l], %[l], %[a]  \n\t"            \
1015
        "adcs %[h], %[h], %[b]  \n\t"            \
1016
        "adc  %[o], %[o], %[c]  \n\t"            \
1017
        "adds %[l], %[l], %[a]  \n\t"            \
1018
        "adcs %[h], %[h], %[b]  \n\t"            \
1019
        "adc  %[o], %[o], %[c]  \n\t"            \
1020
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1021
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
1022
        : "cc"                                           \
1023
    )
1024
/* Count leading zeros. */
1025
#define SP_ASM_LZCNT(va, vn)                             \
1026
    __asm__ __volatile__ (                               \
1027
        "clz  %[n], %[a]  \n\t"                    \
1028
        : [n] "=r" (vn)                                  \
1029
        : [a] "r" (va)                                   \
1030
        :                                                \
1031
    )
1032
1033
#ifndef WOLFSSL_SP_DIV_WORD_HALF
1034
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1035
 *
1036
 * Using udiv instruction on Aarch64.
1037
 * Constant time.
1038
 *
1039
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1040
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1041
 * @param  [in]  d   SP integer digit. Number to divide by.
1042
 * @return  The division result.
1043
 */
1044
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1045
                                          sp_int_digit d)
1046
{
1047
    __asm__ __volatile__ (
1048
        "lsr  x3, %[d], 48\n\t"
1049
        "mov  x5, 16\n\t"
1050
        "cmp  x3, 0\n\t"
1051
        "mov  x4, 63\n\t"
1052
        "csel x3, x5, xzr, eq\n\t"
1053
        "sub  x4, x4, x3\n\t"
1054
        "lsl  %[d], %[d], x3\n\t"
1055
        "lsl  %[hi], %[hi], x3\n\t"
1056
        "lsr  x5, %[lo], x4\n\t"
1057
        "lsl  %[lo], %[lo], x3\n\t"
1058
        "orr  %[hi], %[hi], x5, lsr 1\n\t"
1059
1060
        "lsr  x5, %[d], 32\n\t"
1061
        "add  x5, x5, 1\n\t"
1062
1063
        "udiv x3, %[hi], x5\n\t"
1064
        "lsl  x6, x3, 32\n\t"
1065
        "mul  x4, %[d], x6\n\t"
1066
        "umulh  x3, %[d], x6\n\t"
1067
        "subs %[lo], %[lo], x4\n\t"
1068
        "sbc  %[hi], %[hi], x3\n\t"
1069
1070
        "udiv x3, %[hi], x5\n\t"
1071
        "lsl  x3, x3, 32\n\t"
1072
        "add  x6, x6, x3\n\t"
1073
        "mul  x4, %[d], x3\n\t"
1074
        "umulh  x3, %[d], x3\n\t"
1075
        "subs %[lo], %[lo], x4\n\t"
1076
        "sbc  %[hi], %[hi], x3\n\t"
1077
1078
        "lsr  x3, %[lo], 32\n\t"
1079
        "orr  x3, x3, %[hi], lsl 32\n\t"
1080
1081
        "udiv x3, x3, x5\n\t"
1082
        "add  x6, x6, x3\n\t"
1083
        "mul  x4, %[d], x3\n\t"
1084
        "umulh  x3, %[d], x3\n\t"
1085
        "subs %[lo], %[lo], x4\n\t"
1086
        "sbc  %[hi], %[hi], x3\n\t"
1087
1088
        "lsr  x3, %[lo], 32\n\t"
1089
        "orr  x3, x3, %[hi], lsl 32\n\t"
1090
1091
        "udiv x3, x3, x5\n\t"
1092
        "add  x6, x6, x3\n\t"
1093
        "mul  x4, %[d], x3\n\t"
1094
        "sub  %[lo], %[lo], x4\n\t"
1095
1096
        "udiv x3, %[lo], %[d]\n\t"
1097
        "add  %[hi], x6, x3\n\t"
1098
1099
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1100
        :
1101
        : "x3", "x4", "x5", "x6", "cc"
1102
    );
1103
1104
    return hi;
1105
}
1106
#define SP_ASM_DIV_WORD
1107
#endif
1108
1109
#define SP_INT_ASM_AVAILABLE
1110
1111
    #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
1112
1113
    #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
1114
        SP_WORD_SIZE == 32
1115
/*
1116
 * CPU: ARM32 or Cortex-M4 and similar
1117
 */
1118
1119
/* Multiply va by vb and store double size result in: vh | vl */
1120
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1121
    __asm__ __volatile__ (                               \
1122
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
1123
        : [h] "+r" (vh), [l] "+r" (vl)                   \
1124
        : [a] "r" (va), [b] "r" (vb)                     \
1125
    )
1126
/* Multiply va by vb and store double size result in: vo | vh | vl */
1127
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1128
    __asm__ __volatile__ (                               \
1129
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
1130
        "mov  %[o], #0    \n\t"            \
1131
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
1132
        : [a] "r" (va), [b] "r" (vb)                     \
1133
    )
1134
/* Multiply va by vb and add double size result into: vo | vh | vl */
1135
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1136
    __asm__ __volatile__ (                               \
1137
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1138
        "adds %[l], %[l], r8    \n\t"            \
1139
        "adcs %[h], %[h], r9    \n\t"            \
1140
        "adc  %[o], %[o], #0    \n\t"            \
1141
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1142
        : [a] "r" (va), [b] "r" (vb)                     \
1143
        : "r8", "r9", "cc"                               \
1144
    )
1145
/* Multiply va by vb and add double size result into: vh | vl */
1146
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1147
    __asm__ __volatile__ (                               \
1148
        "umlal  %[l], %[h], %[a], %[b]  \n\t"            \
1149
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1150
        : [a] "r" (va), [b] "r" (vb)                     \
1151
    )
1152
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1153
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1154
    __asm__ __volatile__ (                               \
1155
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1156
        "adds %[l], %[l], r8    \n\t"            \
1157
        "adcs %[h], %[h], r9    \n\t"            \
1158
        "adc  %[o], %[o], #0    \n\t"            \
1159
        "adds %[l], %[l], r8    \n\t"            \
1160
        "adcs %[h], %[h], r9    \n\t"            \
1161
        "adc  %[o], %[o], #0    \n\t"            \
1162
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1163
        : [a] "r" (va), [b] "r" (vb)                     \
1164
        : "r8", "r9", "cc"                               \
1165
    )
1166
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1167
 * Assumes first add will not overflow vh | vl
1168
 */
1169
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1170
    __asm__ __volatile__ (                               \
1171
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1172
        "adds %[l], %[l], r8    \n\t"            \
1173
        "adc  %[h], %[h], r9    \n\t"            \
1174
        "adds %[l], %[l], r8    \n\t"            \
1175
        "adcs %[h], %[h], r9    \n\t"            \
1176
        "adc  %[o], %[o], #0    \n\t"            \
1177
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1178
        : [a] "r" (va), [b] "r" (vb)                     \
1179
        : "r8", "r9", "cc"                               \
1180
    )
1181
/* Square va and store double size result in: vh | vl */
1182
#define SP_ASM_SQR(vl, vh, va)                           \
1183
    __asm__ __volatile__ (                               \
1184
        "umull  %[l], %[h], %[a], %[a]  \n\t"            \
1185
        : [h] "+r" (vh), [l] "+r" (vl)                   \
1186
        : [a] "r" (va)                                   \
1187
    )
1188
/* Square va and add double size result into: vo | vh | vl */
1189
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1190
    __asm__ __volatile__ (                               \
1191
        "umull  r8, r9, %[a], %[a]  \n\t"            \
1192
        "adds %[l], %[l], r8    \n\t"            \
1193
        "adcs %[h], %[h], r9    \n\t"            \
1194
        "adc  %[o], %[o], #0    \n\t"            \
1195
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1196
        : [a] "r" (va)                                   \
1197
        : "r8", "r9", "cc"                               \
1198
    )
1199
/* Square va and add double size result into: vh | vl */
1200
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1201
    __asm__ __volatile__ (                               \
1202
        "umlal  %[l], %[h], %[a], %[a]  \n\t"            \
1203
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1204
        : [a] "r" (va)                                   \
1205
        : "cc"                                           \
1206
    )
1207
/* Add va into: vh | vl */
1208
#define SP_ASM_ADDC(vl, vh, va)                          \
1209
    __asm__ __volatile__ (                               \
1210
        "adds %[l], %[l], %[a]  \n\t"            \
1211
        "adc  %[h], %[h], #0    \n\t"            \
1212
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1213
        : [a] "r" (va)                                   \
1214
        : "cc"                                           \
1215
    )
1216
/* Sub va from: vh | vl */
1217
#define SP_ASM_SUBB(vl, vh, va)                          \
1218
    __asm__ __volatile__ (                               \
1219
        "subs %[l], %[l], %[a]  \n\t"            \
1220
        "sbc  %[h], %[h], #0    \n\t"            \
1221
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1222
        : [a] "r" (va)                                   \
1223
        : "cc"                                           \
1224
    )
1225
/* Add two times vc | vb | va into vo | vh | vl */
1226
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1227
    __asm__ __volatile__ (                               \
1228
        "adds %[l], %[l], %[a]  \n\t"            \
1229
        "adcs %[h], %[h], %[b]  \n\t"            \
1230
        "adc  %[o], %[o], %[c]  \n\t"            \
1231
        "adds %[l], %[l], %[a]  \n\t"            \
1232
        "adcs %[h], %[h], %[b]  \n\t"            \
1233
        "adc  %[o], %[o], %[c]  \n\t"            \
1234
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1235
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
1236
        : "cc"                                           \
1237
    )
1238
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 7)
1239
/* Count leading zeros - instruction only available on ARMv7 and newer. */
1240
#define SP_ASM_LZCNT(va, vn)                             \
1241
    __asm__ __volatile__ (                               \
1242
        "clz  %[n], %[a]  \n\t"                    \
1243
        : [n] "=r" (vn)                                  \
1244
        : [a] "r" (va)                                   \
1245
    )
1246
#endif
1247
1248
#ifndef WOLFSSL_SP_DIV_WORD_HALF
1249
#ifndef WOLFSSL_SP_ARM32_UDIV
1250
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1251
 *
1252
 * No division instruction used - does operation bit by bit.
1253
 * Constant time.
1254
 *
1255
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1256
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1257
 * @param  [in]  d   SP integer digit. Number to divide by.
1258
 * @return  The division result.
1259
 */
1260
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1261
                                          sp_int_digit d)
1262
{
1263
    sp_int_digit r = 0;
1264
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1265
    static const char debruijn32[32] = {
1266
        0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
1267
        1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
1268
    };
1269
    static const sp_uint32 debruijn32_mul = 0x076be629;
1270
#endif
1271
1272
    __asm__ __volatile__ (
1273
        /* Shift d so that top bit is set. */
1274
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1275
        "ldr  r4, %[m]\n\t"
1276
        "mov  r5, %[d]\n\t"
1277
        "orr  r5, r5, r5, lsr #1\n\t"
1278
        "orr  r5, r5, r5, lsr #2\n\t"
1279
        "orr  r5, r5, r5, lsr #4\n\t"
1280
        "orr  r5, r5, r5, lsr #8\n\t"
1281
        "orr  r5, r5, r5, lsr #16\n\t"
1282
        "add  r5, r5, #1\n\t"
1283
        "mul  r6, r5, r4\n\t"
1284
        "lsr  r5, r6, #27\n\t"
1285
        "ldrb r5, [%[t], r5]\n\t"
1286
#else
1287
        "clz  r5, %[d]\n\t"
1288
#endif
1289
        "rsb  r6, r5, #31\n\t"
1290
        "lsl  %[d], %[d], r5\n\t"
1291
        "lsl  %[hi], %[hi], r5\n\t"
1292
        "lsr  r9, %[lo], r6\n\t"
1293
        "lsl  %[lo], %[lo], r5\n\t"
1294
        "orr  %[hi], %[hi], r9, lsr #1\n\t"
1295
1296
        "lsr  r5, %[d], #1\n\t"
1297
        "add  r5, r5, #1\n\t"
1298
        "mov  r6, %[lo]\n\t"
1299
        "mov  r9, %[hi]\n\t"
1300
        /* Do top 32 */
1301
        "subs r8, r5, r9\n\t"
1302
        "sbc  r8, r8, r8\n\t"
1303
        "add  %[r], %[r], %[r]\n\t"
1304
        "sub  %[r], %[r], r8\n\t"
1305
        "and  r8, r8, r5\n\t"
1306
        "subs r9, r9, r8\n\t"
1307
        /* Next 30 bits */
1308
        "mov  r4, #29\n\t"
1309
        "\n1:\n\t"
1310
        "movs r6, r6, lsl #1\n\t"
1311
        "adc  r9, r9, r9\n\t"
1312
        "subs r8, r5, r9\n\t"
1313
        "sbc  r8, r8, r8\n\t"
1314
        "add  %[r], %[r], %[r]\n\t"
1315
        "sub  %[r], %[r], r8\n\t"
1316
        "and  r8, r8, r5\n\t"
1317
        "subs r9, r9, r8\n\t"
1318
        "subs r4, r4, #1\n\t"
1319
        "bpl  1b\n\t"
1320
1321
        "add  %[r], %[r], %[r]\n\t"
1322
        "add  %[r], %[r], #1\n\t"
1323
1324
        /* Handle difference has hi word > 0. */
1325
        "umull  r4, r5, %[r], %[d]\n\t"
1326
        "subs r4, %[lo], r4\n\t"
1327
        "sbc  r5, %[hi], r5\n\t"
1328
        "add  %[r], %[r], r5\n\t"
1329
        "umull  r4, r5, %[r], %[d]\n\t"
1330
        "subs r4, %[lo], r4\n\t"
1331
        "sbc  r5, %[hi], r5\n\t"
1332
        "add  %[r], %[r], r5\n\t"
1333
1334
        /* Add 1 to result if bottom half of difference is >= d. */
1335
        "mul  r4, %[r], %[d]\n\t"
1336
        "subs r4, %[lo], r4\n\t"
1337
        "subs r9, %[d], r4\n\t"
1338
        "sbc  r8, r8, r8\n\t"
1339
        "sub  %[r], %[r], r8\n\t"
1340
        "subs r9, r9, #1\n\t"
1341
        "sbc  r8, r8, r8\n\t"
1342
        "sub  %[r], %[r], r8\n\t"
1343
        : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1344
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1345
        : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
1346
#else
1347
        :
1348
#endif
1349
        : "r4", "r5", "r6", "r8", "r9", "cc"
1350
    );
1351
1352
    return r;
1353
}
1354
#else
1355
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1356
 *
1357
 * Using udiv instruction on arm32
1358
 * Constant time.
1359
 *
1360
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1361
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1362
 * @param  [in]  d   SP integer digit. Number to divide by.
1363
 * @return  The division result.
1364
 */
1365
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1366
                                          sp_int_digit d)
1367
{
1368
    __asm__ __volatile__ (
1369
        "lsrs r3, %[d], #24\n\t"
1370
  "it eq\n\t"
1371
        "moveq  r3, #8\n\t"
1372
  "it ne\n\t"
1373
        "movne  r3, #0\n\t"
1374
        "rsb  r4, r3, #31\n\t"
1375
        "lsl  %[d], %[d], r3\n\t"
1376
        "lsl  %[hi], %[hi], r3\n\t"
1377
        "lsr  r5, %[lo], r4\n\t"
1378
        "lsl  %[lo], %[lo], r3\n\t"
1379
        "orr  %[hi], %[hi], r5, lsr #1\n\t"
1380
1381
        "lsr  r5, %[d], 16\n\t"
1382
        "add  r5, r5, 1\n\t"
1383
1384
        "udiv r3, %[hi], r5\n\t"
1385
        "lsl  r6, r3, 16\n\t"
1386
        "umull  r4, r3, %[d], r6\n\t"
1387
        "subs %[lo], %[lo], r4\n\t"
1388
        "sbc  %[hi], %[hi], r3\n\t"
1389
1390
        "udiv r3, %[hi], r5\n\t"
1391
        "lsl  r3, r3, 16\n\t"
1392
        "add  r6, r6, r3\n\t"
1393
        "umull  r4, r3, %[d], r3\n\t"
1394
        "subs %[lo], %[lo], r4\n\t"
1395
        "sbc  %[hi], %[hi], r3\n\t"
1396
1397
        "lsr  r3, %[lo], 16\n\t"
1398
        "orr  r3, r3, %[hi], lsl 16\n\t"
1399
1400
        "udiv r3, r3, r5\n\t"
1401
        "add  r6, r6, r3\n\t"
1402
        "umull  r4, r3, %[d], r3\n\t"
1403
        "subs %[lo], %[lo], r4\n\t"
1404
        "sbc  %[hi], %[hi], r3\n\t"
1405
1406
        "lsr  r3, %[lo], 16\n\t"
1407
        "orr  r3, r3, %[hi], lsl 16\n\t"
1408
1409
        "udiv r3, r3, r5\n\t"
1410
        "add  r6, r6, r3\n\t"
1411
        "mul  r4, %[d], r3\n\t"
1412
        "sub  %[lo], %[lo], r4\n\t"
1413
1414
        "udiv r3, %[lo], %[d]\n\t"
1415
        "add  %[hi], r6, r3\n\t"
1416
1417
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1418
        :
1419
        : "r3", "r4", "r5", "r6", "cc"
1420
    );
1421
1422
    return hi;
1423
}
1424
#endif
1425
1426
#define SP_ASM_DIV_WORD
1427
#endif
1428
1429
#define SP_INT_ASM_AVAILABLE
1430
1431
    #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
1432
1433
    #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
1434
/*
1435
 * CPU: ARM Thumb (like Cortex-M0)
1436
 */
1437
1438
/* Compile with -fomit-frame-pointer, or similar, if compiler complains about
1439
 * usage of register 'r7'.
1440
 */
1441
1442
#if defined(__clang__)
1443
1444
/* Multiply va by vb and store double size result in: vh | vl */
1445
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1446
    __asm__ __volatile__ (                               \
1447
        /* al * bl */                                    \
1448
        "uxth r6, %[a]    \n\t"            \
1449
        "uxth %[l], %[b]    \n\t"            \
1450
        "muls %[l], r6    \n\t"            \
1451
        /* al * bh */                                    \
1452
        "lsrs r4, %[b], #16   \n\t"            \
1453
        "muls r6, r4      \n\t"            \
1454
        "lsrs %[h], r6, #16   \n\t"            \
1455
        "lsls r6, r6, #16   \n\t"            \
1456
        "adds %[l], %[l], r6    \n\t"            \
1457
        "movs r5, #0      \n\t"            \
1458
        "adcs %[h], r5    \n\t"            \
1459
        /* ah * bh */                                    \
1460
        "lsrs r6, %[a], #16   \n\t"            \
1461
        "muls r4, r6      \n\t"            \
1462
        "adds %[h], %[h], r4    \n\t"            \
1463
        /* ah * bl */                                    \
1464
        "uxth r4, %[b]    \n\t"            \
1465
        "muls r6, r4      \n\t"            \
1466
        "lsrs r4, r6, #16   \n\t"            \
1467
        "lsls r6, r6, #16   \n\t"            \
1468
        "adds %[l], %[l], r6    \n\t"            \
1469
        "adcs %[h], r4    \n\t"            \
1470
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1471
        : [a] "l" (va), [b] "l" (vb)                     \
1472
        : "r4", "r5", "r6", "cc"                         \
1473
    )
1474
/* Multiply va by vb and store double size result in: vo | vh | vl */
1475
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1476
    __asm__ __volatile__ (                               \
1477
        /* al * bl */                                    \
1478
        "uxth r6, %[a]    \n\t"            \
1479
        "uxth %[l], %[b]    \n\t"            \
1480
        "muls %[l], r6    \n\t"            \
1481
        /* al * bh */                                    \
1482
        "lsrs r5, %[b], #16   \n\t"            \
1483
        "muls r6, r5      \n\t"            \
1484
        "lsrs %[h], r6, #16   \n\t"            \
1485
        "lsls r6, r6, #16   \n\t"            \
1486
        "adds %[l], %[l], r6    \n\t"            \
1487
        "movs %[o], #0    \n\t"            \
1488
        "adcs %[h], %[o]    \n\t"            \
1489
        /* ah * bh */                                    \
1490
        "lsrs r6, %[a], #16   \n\t"            \
1491
        "muls r5, r6      \n\t"            \
1492
        "adds %[h], %[h], r5    \n\t"            \
1493
        /* ah * bl */                                    \
1494
        "uxth r5, %[b]    \n\t"            \
1495
        "muls r6, r5      \n\t"            \
1496
        "lsrs r5, r6, #16   \n\t"            \
1497
        "lsls r6, r6, #16   \n\t"            \
1498
        "adds %[l], %[l], r6    \n\t"            \
1499
        "adcs %[h], r5    \n\t"            \
1500
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1501
        : [a] "l" (va), [b] "l" (vb)                     \
1502
        : "r5", "r6", "cc"                               \
1503
    )
1504
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1505
/* Multiply va by vb and add double size result into: vo | vh | vl */
1506
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1507
    __asm__ __volatile__ (                               \
1508
        /* al * bl */                                    \
1509
        "uxth r6, %[a]    \n\t"            \
1510
        "uxth r7, %[b]    \n\t"            \
1511
        "muls r7, r6      \n\t"            \
1512
        "adds %[l], %[l], r7    \n\t"            \
1513
        "movs r5, #0      \n\t"            \
1514
        "adcs %[h], r5    \n\t"            \
1515
        "adcs %[o], r5    \n\t"            \
1516
        /* al * bh */                                    \
1517
        "lsrs r7, %[b], #16   \n\t"            \
1518
        "muls r6, r7      \n\t"            \
1519
        "lsrs r7, r6, #16   \n\t"            \
1520
        "lsls r6, r6, #16   \n\t"            \
1521
        "adds %[l], %[l], r6    \n\t"            \
1522
        "adcs %[h], r7    \n\t"            \
1523
        "adcs %[o], r5    \n\t"            \
1524
        /* ah * bh */                                    \
1525
        "lsrs r6, %[a], #16   \n\t"            \
1526
        "lsrs r7, %[b], #16   \n\t"            \
1527
        "muls r7, r6      \n\t"            \
1528
        "adds %[h], %[h], r7    \n\t"            \
1529
        "adcs %[o], r5    \n\t"            \
1530
        /* ah * bl */                                    \
1531
        "uxth r7, %[b]    \n\t"            \
1532
        "muls r6, r7      \n\t"            \
1533
        "lsrs r7, r6, #16   \n\t"            \
1534
        "lsls r6, r6, #16   \n\t"            \
1535
        "adds %[l], %[l], r6    \n\t"            \
1536
        "adcs %[h], r7    \n\t"            \
1537
        "adcs %[o], r5    \n\t"            \
1538
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1539
        : [a] "l" (va), [b] "l" (vb)                     \
1540
        : "r5", "r6", "r7", "cc"                         \
1541
    )
1542
#else
1543
/* Multiply va by vb and add double size result into: vo | vh | vl */
1544
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1545
    __asm__ __volatile__ (                               \
1546
        /* al * bl */                                    \
1547
        "uxth r6, %[a]    \n\t"            \
1548
        "uxth r5, %[b]    \n\t"            \
1549
        "muls r5, r6      \n\t"            \
1550
        "adds %[l], %[l], r5    \n\t"            \
1551
        "movs r5, #0      \n\t"            \
1552
        "adcs %[h], r5    \n\t"            \
1553
        "adcs %[o], r5    \n\t"            \
1554
        /* al * bh */                                    \
1555
        "lsrs r5, %[b], #16   \n\t"            \
1556
        "muls r6, r5      \n\t"            \
1557
        "lsrs r5, r6, #16   \n\t"            \
1558
        "lsls r6, r6, #16   \n\t"            \
1559
        "adds %[l], %[l], r6    \n\t"            \
1560
        "adcs %[h], r5    \n\t"            \
1561
        "movs r5, #0      \n\t"            \
1562
        "adcs %[o], r5    \n\t"            \
1563
        /* ah * bh */                                    \
1564
        "lsrs r6, %[a], #16   \n\t"            \
1565
        "lsrs r5, %[b], #16   \n\t"            \
1566
        "muls r5, r6      \n\t"            \
1567
        "adds %[h], %[h], r5    \n\t"            \
1568
        "movs r5, #0      \n\t"            \
1569
        "adcs %[o], r5    \n\t"            \
1570
        /* ah * bl */                                    \
1571
        "uxth r5, %[b]    \n\t"            \
1572
        "muls r6, r5      \n\t"            \
1573
        "lsrs r5, r6, #16   \n\t"            \
1574
        "lsls r6, r6, #16   \n\t"            \
1575
        "adds %[l], %[l], r6    \n\t"            \
1576
        "adcs %[h], r5    \n\t"            \
1577
        "movs r5, #0      \n\t"            \
1578
        "adcs %[o], r5    \n\t"            \
1579
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1580
        : [a] "l" (va), [b] "l" (vb)                     \
1581
        : "r5", "r6", "cc"                               \
1582
    )
1583
#endif
1584
/* Multiply va by vb and add double size result into: vh | vl */
1585
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1586
    __asm__ __volatile__ (                               \
1587
        /* al * bl */                                    \
1588
        "uxth r6, %[a]    \n\t"            \
1589
        "uxth r4, %[b]    \n\t"            \
1590
        "muls r4, r6      \n\t"            \
1591
        "adds %[l], %[l], r4    \n\t"            \
1592
        "movs r5, #0      \n\t"            \
1593
        "adcs %[h], r5    \n\t"            \
1594
        /* al * bh */                                    \
1595
        "lsrs r4, %[b], #16   \n\t"            \
1596
        "muls r6, r4      \n\t"            \
1597
        "lsrs r4, r6, #16   \n\t"            \
1598
        "lsls r6, r6, #16   \n\t"            \
1599
        "adds %[l], %[l], r6    \n\t"            \
1600
        "adcs %[h], r4    \n\t"            \
1601
        /* ah * bh */                                    \
1602
        "lsrs r6, %[a], #16   \n\t"            \
1603
        "lsrs r4, %[b], #16   \n\t"            \
1604
        "muls r4, r6      \n\t"            \
1605
        "adds %[h], %[h], r4    \n\t"            \
1606
        /* ah * bl */                                    \
1607
        "uxth r4, %[b]    \n\t"            \
1608
        "muls r6, r4      \n\t"            \
1609
        "lsrs r4, r6, #16   \n\t"            \
1610
        "lsls r6, r6, #16   \n\t"            \
1611
        "adds %[l], %[l], r6    \n\t"            \
1612
        "adcs %[h], r4    \n\t"            \
1613
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1614
        : [a] "l" (va), [b] "l" (vb)                     \
1615
        : "r4", "r5", "r6", "cc"                         \
1616
    )
1617
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1618
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1619
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1620
    __asm__ __volatile__ (                               \
1621
        /* al * bl */                                    \
1622
        "uxth r6, %[a]    \n\t"            \
1623
        "uxth r7, %[b]    \n\t"            \
1624
        "muls r7, r6      \n\t"            \
1625
        "adds %[l], %[l], r7    \n\t"            \
1626
        "movs r5, #0      \n\t"            \
1627
        "adcs %[h], r5    \n\t"            \
1628
        "adcs %[o], r5    \n\t"            \
1629
        "adds %[l], %[l], r7    \n\t"            \
1630
        "adcs %[h], r5    \n\t"            \
1631
        "adcs %[o], r5    \n\t"            \
1632
        /* al * bh */                                    \
1633
        "lsrs r7, %[b], #16   \n\t"            \
1634
        "muls r6, r7      \n\t"            \
1635
        "lsrs r7, r6, #16   \n\t"            \
1636
        "lsls r6, r6, #16   \n\t"            \
1637
        "adds %[l], %[l], r6    \n\t"            \
1638
        "adcs %[h], r7    \n\t"            \
1639
        "adcs %[o], r5    \n\t"            \
1640
        "adds %[l], %[l], r6    \n\t"            \
1641
        "adcs %[h], r7    \n\t"            \
1642
        "adcs %[o], r5    \n\t"            \
1643
        /* ah * bh */                                    \
1644
        "lsrs r6, %[a], #16   \n\t"            \
1645
        "lsrs r7, %[b], #16   \n\t"            \
1646
        "muls r7, r6      \n\t"            \
1647
        "adds %[h], %[h], r7    \n\t"            \
1648
        "adcs %[o], r5    \n\t"            \
1649
        "adds %[h], %[h], r7    \n\t"            \
1650
        "adcs %[o], r5    \n\t"            \
1651
        /* ah * bl */                                    \
1652
        "uxth r7, %[b]    \n\t"            \
1653
        "muls r6, r7      \n\t"            \
1654
        "lsrs r7, r6, #16   \n\t"            \
1655
        "lsls r6, r6, #16   \n\t"            \
1656
        "adds %[l], %[l], r6    \n\t"            \
1657
        "adcs %[h], r7    \n\t"            \
1658
        "adcs %[o], r5    \n\t"            \
1659
        "adds %[l], %[l], r6    \n\t"            \
1660
        "adcs %[h], r7    \n\t"            \
1661
        "adcs %[o], r5    \n\t"            \
1662
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1663
        : [a] "l" (va), [b] "l" (vb)                     \
1664
        : "r5", "r6", "r7", "cc"                         \
1665
    )
1666
#else
1667
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1668
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1669
    __asm__ __volatile__ (                               \
1670
        "movs r8, %[a]    \n\t"            \
1671
        /* al * bl */                                    \
1672
        "uxth r6, %[a]    \n\t"            \
1673
        "uxth r5, %[b]    \n\t"            \
1674
        "muls r5, r6      \n\t"            \
1675
        "adds %[l], %[l], r5    \n\t"            \
1676
        "movs %[a], #0    \n\t"            \
1677
        "adcs %[h], %[a]    \n\t"            \
1678
        "adcs %[o], %[a]    \n\t"            \
1679
        "adds %[l], %[l], r5    \n\t"            \
1680
        "adcs %[h], %[a]    \n\t"            \
1681
        "adcs %[o], %[a]    \n\t"            \
1682
        /* al * bh */                                    \
1683
        "lsrs r5, %[b], #16   \n\t"            \
1684
        "muls r6, r5      \n\t"            \
1685
        "lsrs r5, r6, #16   \n\t"            \
1686
        "lsls r6, r6, #16   \n\t"            \
1687
        "adds %[l], %[l], r6    \n\t"            \
1688
        "adcs %[h], r5    \n\t"            \
1689
        "adcs %[o], %[a]    \n\t"            \
1690
        "adds %[l], %[l], r6    \n\t"            \
1691
        "adcs %[h], r5    \n\t"            \
1692
        "adcs %[o], %[a]    \n\t"            \
1693
        /* ah * bh */                                    \
1694
        "movs %[a], r8    \n\t"            \
1695
        "lsrs r6, %[a], #16   \n\t"            \
1696
        "lsrs r5, %[b], #16   \n\t"            \
1697
        "muls r5, r6      \n\t"            \
1698
        "adds %[h], %[h], r5    \n\t"            \
1699
        "movs %[a], #0    \n\t"            \
1700
        "adcs %[o], %[a]    \n\t"            \
1701
        "adds %[h], %[h], r5    \n\t"            \
1702
        "adcs %[o], %[a]    \n\t"            \
1703
        /* ah * bl */                                    \
1704
        "uxth r5, %[b]    \n\t"            \
1705
        "muls r6, r5      \n\t"            \
1706
        "lsrs r5, r6, #16   \n\t"            \
1707
        "lsls r6, r6, #16   \n\t"            \
1708
        "adds %[l], %[l], r6    \n\t"            \
1709
        "adcs %[h], r5    \n\t"            \
1710
        "adcs %[o], %[a]    \n\t"            \
1711
        "adds %[l], %[l], r6    \n\t"            \
1712
        "adcs %[h], r5    \n\t"            \
1713
        "adcs %[o], %[a]    \n\t"            \
1714
        "movs %[a], r8    \n\t"            \
1715
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1716
        : [a] "l" (va), [b] "l" (vb)                     \
1717
        : "r5", "r6", "r8", "cc"                         \
1718
    )
1719
#endif
1720
#ifndef DEBUG
1721
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1722
 * Assumes first add will not overflow vh | vl
1723
 */
1724
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1725
    __asm__ __volatile__ (                               \
1726
        /* al * bl */                                    \
1727
        "uxth r6, %[a]    \n\t"            \
1728
        "uxth r7, %[b]    \n\t"            \
1729
        "muls r7, r6      \n\t"            \
1730
        "adds %[l], %[l], r7    \n\t"            \
1731
        "movs r5, #0      \n\t"            \
1732
        "adcs %[h], r5    \n\t"            \
1733
        "adds %[l], %[l], r7    \n\t"            \
1734
        "adcs %[h], r5    \n\t"            \
1735
        /* al * bh */                                    \
1736
        "lsrs r7, %[b], #16   \n\t"            \
1737
        "muls r6, r7      \n\t"            \
1738
        "lsrs r7, r6, #16   \n\t"            \
1739
        "lsls r6, r6, #16   \n\t"            \
1740
        "adds %[l], %[l], r6    \n\t"            \
1741
        "adcs %[h], r7    \n\t"            \
1742
        "adds %[l], %[l], r6    \n\t"            \
1743
        "adcs %[h], r7    \n\t"            \
1744
        "adcs %[o], r5    \n\t"            \
1745
        /* ah * bh */                                    \
1746
        "lsrs r6, %[a], #16   \n\t"            \
1747
        "lsrs r7, %[b], #16   \n\t"            \
1748
        "muls r7, r6      \n\t"            \
1749
        "adds %[h], %[h], r7    \n\t"            \
1750
        "adcs %[o], r5    \n\t"            \
1751
        "adds %[h], %[h], r7    \n\t"            \
1752
        "adcs %[o], r5    \n\t"            \
1753
        /* ah * bl */                                    \
1754
        "uxth r7, %[b]    \n\t"            \
1755
        "muls r6, r7      \n\t"            \
1756
        "lsrs r7, r6, #16   \n\t"            \
1757
        "lsls r6, r6, #16   \n\t"            \
1758
        "adds %[l], %[l], r6    \n\t"            \
1759
        "adcs %[h], r7    \n\t"            \
1760
        "adcs %[o], r5    \n\t"            \
1761
        "adds %[l], %[l], r6    \n\t"            \
1762
        "adcs %[h], r7    \n\t"            \
1763
        "adcs %[o], r5    \n\t"            \
1764
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1765
        : [a] "l" (va), [b] "l" (vb)                     \
1766
        : "r5", "r6", "r7", "cc"                         \
1767
    )
1768
#else
1769
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1770
 * Assumes first add will not overflow vh | vl
1771
 */
1772
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1773
    __asm__ __volatile__ (                               \
1774
        "movs r8, %[a]    \n\t"            \
1775
        /* al * bl */                                    \
1776
        "uxth r5, %[a]    \n\t"            \
1777
        "uxth r6, %[b]    \n\t"            \
1778
        "muls r6, r5      \n\t"            \
1779
        "adds %[l], %[l], r6    \n\t"            \
1780
        "movs %[a], #0    \n\t"            \
1781
        "adcs %[h], %[a]    \n\t"            \
1782
        "adds %[l], %[l], r6    \n\t"            \
1783
        "adcs %[h], %[a]    \n\t"            \
1784
        /* al * bh */                                    \
1785
        "lsrs r6, %[b], #16   \n\t"            \
1786
        "muls r5, r6      \n\t"            \
1787
        "lsrs r6, r5, #16   \n\t"            \
1788
        "lsls r5, r5, #16   \n\t"            \
1789
        "adds %[l], %[l], r5    \n\t"            \
1790
        "adcs %[h], r6    \n\t"            \
1791
        "adds %[l], %[l], r5    \n\t"            \
1792
        "adcs %[h], r6    \n\t"            \
1793
        "adcs %[o], %[a]    \n\t"            \
1794
        /* ah * bh */                                    \
1795
        "movs %[a], r8    \n\t"            \
1796
        "lsrs r5, %[a], #16   \n\t"            \
1797
        "lsrs r6, %[b], #16   \n\t"            \
1798
        "muls r6, r5      \n\t"            \
1799
        "movs %[a], #0    \n\t"            \
1800
        "adds %[h], %[h], r6    \n\t"            \
1801
        "adcs %[o], %[a]    \n\t"            \
1802
        "adds %[h], %[h], r6    \n\t"            \
1803
        "adcs %[o], %[a]    \n\t"            \
1804
        /* ah * bl */                                    \
1805
        "uxth r6, %[b]    \n\t"            \
1806
        "muls r5, r6      \n\t"            \
1807
        "lsrs r6, r5, #16   \n\t"            \
1808
        "lsls r5, r5, #16   \n\t"            \
1809
        "adds %[l], %[l], r5    \n\t"            \
1810
        "adcs %[h], r6    \n\t"            \
1811
        "adcs %[o], %[a]    \n\t"            \
1812
        "adds %[l], %[l], r5    \n\t"            \
1813
        "adcs %[h], r6    \n\t"            \
1814
        "adcs %[o], %[a]    \n\t"            \
1815
        "movs %[a], r8    \n\t"            \
1816
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1817
        : [a] "l" (va), [b] "l" (vb)                     \
1818
        : "r5", "r6", "r8", "cc"                         \
1819
    )
1820
#endif
1821
/* Square va and store double size result in: vh | vl */
1822
#define SP_ASM_SQR(vl, vh, va)                           \
1823
    __asm__ __volatile__ (                               \
1824
        "lsrs r5, %[a], #16   \n\t"            \
1825
        "uxth r6, %[a]    \n\t"            \
1826
        "mov  %[l], r6    \n\t"            \
1827
        "mov  %[h], r5    \n\t"            \
1828
        /* al * al */                                    \
1829
        "muls %[l], %[l]    \n\t"            \
1830
        /* ah * ah */                                    \
1831
        "muls %[h], %[h]    \n\t"            \
1832
        /* 2 * al * ah */                                \
1833
        "muls r6, r5      \n\t"            \
1834
        "lsrs r5, r6, #15   \n\t"            \
1835
        "lsls r6, r6, #17   \n\t"            \
1836
        "adds %[l], %[l], r6    \n\t"            \
1837
        "adcs %[h], r5    \n\t"            \
1838
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1839
        : [a] "l" (va)                                   \
1840
        : "r5", "r6", "cc"                               \
1841
    )
1842
/* Square va and add double size result into: vo | vh | vl */
1843
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1844
    __asm__ __volatile__ (                               \
1845
        "lsrs r4, %[a], #16   \n\t"            \
1846
        "uxth r6, %[a]    \n\t"            \
1847
        /* al * al */                                    \
1848
        "muls r6, r6      \n\t"            \
1849
        /* ah * ah */                                    \
1850
        "muls r4, r4      \n\t"            \
1851
        "adds %[l], %[l], r6    \n\t"            \
1852
        "adcs %[h], r4    \n\t"            \
1853
        "movs r5, #0      \n\t"            \
1854
        "adcs %[o], r5    \n\t"            \
1855
        "lsrs r4, %[a], #16   \n\t"            \
1856
        "uxth r6, %[a]    \n\t"            \
1857
        /* 2 * al * ah */                                \
1858
        "muls r6, r4      \n\t"            \
1859
        "lsrs r4, r6, #15   \n\t"            \
1860
        "lsls r6, r6, #17   \n\t"            \
1861
        "adds %[l], %[l], r6    \n\t"            \
1862
        "adcs %[h], r4    \n\t"            \
1863
        "adcs %[o], r5    \n\t"            \
1864
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1865
        : [a] "l" (va)                                   \
1866
        : "r4", "r5", "r6", "cc"                         \
1867
    )
1868
/* Square va and add double size result into: vh | vl */
1869
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1870
    __asm__ __volatile__ (                               \
1871
        "lsrs r6, %[a], #16   \n\t"            \
1872
        "uxth r6, %[a]    \n\t"            \
1873
        /* al * al */                                    \
1874
        "muls r6, r6      \n\t"            \
1875
        /* ah * ah */                                    \
1876
        "muls r6, r6      \n\t"            \
1877
        "adds %[l], %[l], r6    \n\t"            \
1878
        "adcs %[h], r6    \n\t"            \
1879
        "lsrs r6, %[a], #16   \n\t"            \
1880
        "uxth r6, %[a]    \n\t"            \
1881
        /* 2 * al * ah */                                \
1882
        "muls r6, r6      \n\t"            \
1883
        "lsrs r6, r6, #15   \n\t"            \
1884
        "lsls r6, r6, #17   \n\t"            \
1885
        "adds %[l], %[l], r6    \n\t"            \
1886
        "adcs %[h], r6    \n\t"            \
1887
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1888
        : [a] "l" (va)                                   \
1889
        : "r5", "r6", "cc"                               \
1890
    )
1891
/* Add va into: vh | vl */
1892
#define SP_ASM_ADDC(vl, vh, va)                          \
1893
    __asm__ __volatile__ (                               \
1894
        "adds %[l], %[l], %[a]  \n\t"            \
1895
        "movs r5, #0      \n\t"            \
1896
        "adcs %[h], r5    \n\t"            \
1897
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1898
        : [a] "l" (va)                                   \
1899
        : "r5", "cc"                                     \
1900
    )
1901
/* Sub va from: vh | vl */
1902
#define SP_ASM_SUBB(vl, vh, va)                          \
1903
    __asm__ __volatile__ (                               \
1904
        "subs %[l], %[l], %[a]  \n\t"            \
1905
        "movs r5, #0      \n\t"            \
1906
        "sbcs %[h], r5    \n\t"            \
1907
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1908
        : [a] "l" (va)                                   \
1909
        : "r5", "cc"                                     \
1910
    )
1911
/* Add two times vc | vb | va into vo | vh | vl */
1912
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1913
    __asm__ __volatile__ (                               \
1914
        "adds %[l], %[l], %[a]  \n\t"            \
1915
        "adcs %[h], %[b]    \n\t"            \
1916
        "adcs %[o], %[c]    \n\t"            \
1917
        "adds %[l], %[l], %[a]  \n\t"            \
1918
        "adcs %[h], %[b]    \n\t"            \
1919
        "adcs %[o], %[c]    \n\t"            \
1920
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1921
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
1922
        : "cc"                                           \
1923
    )
1924
1925
#elif defined(WOLFSSL_KEIL)
1926
1927
/* Multiply va by vb and store double size result in: vh | vl */
1928
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1929
    __asm__ __volatile__ (                               \
1930
        /* al * bl */                                    \
1931
        "uxth r6, %[a]    \n\t"            \
1932
        "uxth %[l], %[b]    \n\t"            \
1933
        "muls %[l], r6, %[l]    \n\t"            \
1934
        /* al * bh */                                    \
1935
        "lsrs r4, %[b], #16   \n\t"            \
1936
        "muls r6, r4, r6    \n\t"            \
1937
        "lsrs %[h], r6, #16   \n\t"            \
1938
        "lsls r6, r6, #16   \n\t"            \
1939
        "adds %[l], %[l], r6    \n\t"            \
1940
        "movs r5, #0      \n\t"            \
1941
        "adcs %[h], %[h], r5    \n\t"            \
1942
        /* ah * bh */                                    \
1943
        "lsrs r6, %[a], #16   \n\t"            \
1944
        "muls r4, r6, r4    \n\t"            \
1945
        "adds %[h], %[h], r4    \n\t"            \
1946
        /* ah * bl */                                    \
1947
        "uxth r4, %[b]    \n\t"            \
1948
        "muls r6, r4, r6    \n\t"            \
1949
        "lsrs r4, r6, #16   \n\t"            \
1950
        "lsls r6, r6, #16   \n\t"            \
1951
        "adds %[l], %[l], r6    \n\t"            \
1952
        "adcs %[h], %[h], r4    \n\t"            \
1953
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1954
        : [a] "l" (va), [b] "l" (vb)                     \
1955
        : "r4", "r5", "r6", "cc"                         \
1956
    )
1957
/* Multiply va by vb and store double size result in: vo | vh | vl */
1958
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1959
    __asm__ __volatile__ (                               \
1960
        /* al * bl */                                    \
1961
        "uxth r6, %[a]    \n\t"            \
1962
        "uxth %[l], %[b]    \n\t"            \
1963
        "muls %[l], r6, %[l]    \n\t"            \
1964
        /* al * bh */                                    \
1965
        "lsrs r5, %[b], #16   \n\t"            \
1966
        "muls r6, r5, r6    \n\t"            \
1967
        "lsrs %[h], r6, #16   \n\t"            \
1968
        "lsls r6, r6, #16   \n\t"            \
1969
        "adds %[l], %[l], r6    \n\t"            \
1970
        "movs %[o], #0    \n\t"            \
1971
        "adcs %[h], %[h], %[o]  \n\t"            \
1972
        /* ah * bh */                                    \
1973
        "lsrs r6, %[a], #16   \n\t"            \
1974
        "muls r5, r6, r5    \n\t"            \
1975
        "adds %[h], %[h], r5    \n\t"            \
1976
        /* ah * bl */                                    \
1977
        "uxth r5, %[b]    \n\t"            \
1978
        "muls r6, r5, r6    \n\t"            \
1979
        "lsrs r5, r6, #16   \n\t"            \
1980
        "lsls r6, r6, #16   \n\t"            \
1981
        "adds %[l], %[l], r6    \n\t"            \
1982
        "adcs %[h], %[h], r5    \n\t"            \
1983
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1984
        : [a] "l" (va), [b] "l" (vb)                     \
1985
        : "r5", "r6", "cc"                               \
1986
    )
1987
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1988
/* Multiply va by vb and add double size result into: vo | vh | vl */
1989
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1990
    __asm__ __volatile__ (                               \
1991
        /* al * bl */                                    \
1992
        "uxth r6, %[a]    \n\t"            \
1993
        "uxth r7, %[b]    \n\t"            \
1994
        "muls r7, r6, r7    \n\t"            \
1995
        "adds %[l], %[l], r7    \n\t"            \
1996
        "movs r5, #0      \n\t"            \
1997
        "adcs %[h], %[h], r5    \n\t"            \
1998
        "adcs %[o], %[o], r5    \n\t"            \
1999
        /* al * bh */                                    \
2000
        "lsrs r7, %[b], #16   \n\t"            \
2001
        "muls r6, r7, r6    \n\t"            \
2002
        "lsrs r7, r6, #16   \n\t"            \
2003
        "lsls r6, r6, #16   \n\t"            \
2004
        "adds %[l], %[l], r6    \n\t"            \
2005
        "adcs %[h], %[h], r7    \n\t"            \
2006
        "adcs %[o], %[o], r5    \n\t"            \
2007
        /* ah * bh */                                    \
2008
        "lsrs r6, %[a], #16   \n\t"            \
2009
        "lsrs r7, %[b], #16   \n\t"            \
2010
        "muls r7, r6, r7    \n\t"            \
2011
        "adds %[h], %[h], r7    \n\t"            \
2012
        "adcs %[o], %[o], r5    \n\t"            \
2013
        /* ah * bl */                                    \
2014
        "uxth r7, %[b]    \n\t"            \
2015
        "muls r6, r7, r6    \n\t"            \
2016
        "lsrs r7, r6, #16   \n\t"            \
2017
        "lsls r6, r6, #16   \n\t"            \
2018
        "adds %[l], %[l], r6    \n\t"            \
2019
        "adcs %[h], %[h], r7    \n\t"            \
2020
        "adcs %[o], %[o], r5    \n\t"            \
2021
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2022
        : [a] "l" (va), [b] "l" (vb)                     \
2023
        : "r5", "r6", "r7", "cc"                         \
2024
    )
2025
#else
2026
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2027
    __asm__ __volatile__ (                               \
2028
        /* al * bl */                                    \
2029
        "uxth   r6, %[a]                \n\t"            \
2030
        "uxth   r5, %[b]                \n\t"            \
2031
        "muls   r5, r6, r5              \n\t"            \
2032
        "adds   %[l], %[l], r5          \n\t"            \
2033
        "movs   r5, #0                  \n\t"            \
2034
        "adcs   %[h], %[h], r5          \n\t"            \
2035
        "adcs   %[o], %[o], r5          \n\t"            \
2036
        /* al * bh */                                    \
2037
        "lsrs   r5, %[b], #16           \n\t"            \
2038
        "muls   r6, r5, r6              \n\t"            \
2039
        "lsrs   r5, r6, #16             \n\t"            \
2040
        "lsls   r6, r6, #16             \n\t"            \
2041
        "adds   %[l], %[l], r6          \n\t"            \
2042
        "adcs   %[h], %[h], r5          \n\t"            \
2043
        "movs   r5, #0                  \n\t"            \
2044
        "adcs   %[o], %[o], r5          \n\t"            \
2045
        /* ah * bh */                                    \
2046
        "lsrs   r6, %[a], #16           \n\t"            \
2047
        "lsrs   r5, %[b], #16           \n\t"            \
2048
        "muls   r5, r6, r5              \n\t"            \
2049
        "adds   %[h], %[h], r5          \n\t"            \
2050
        "movs   r5, #0                  \n\t"            \
2051
        "adcs   %[o], %[o], r5          \n\t"            \
2052
        /* ah * bl */                                    \
2053
        "uxth   r5, %[b]                \n\t"            \
2054
        "muls   r6, r5, r6              \n\t"            \
2055
        "lsrs   r5, r6, #16             \n\t"            \
2056
        "lsls   r6, r6, #16             \n\t"            \
2057
        "adds   %[l], %[l], r6          \n\t"            \
2058
        "adcs   %[h], %[h], r5          \n\t"            \
2059
        "movs   r5, #0                  \n\t"            \
2060
        "adcs   %[o], %[o], r5          \n\t"            \
2061
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2062
        : [a] "l" (va), [b] "l" (vb)                     \
2063
        : "r5", "r6", "cc"                               \
2064
    )
2065
#endif
2066
/* Multiply va by vb and add double size result into: vh | vl */
2067
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
2068
    __asm__ __volatile__ (                               \
2069
        /* al * bl */                                    \
2070
        "uxth r6, %[a]    \n\t"            \
2071
        "uxth r4, %[b]    \n\t"            \
2072
        "muls r4, r6, r4    \n\t"            \
2073
        "adds %[l], %[l], r4    \n\t"            \
2074
        "movs r5, #0      \n\t"            \
2075
        "adcs %[h], %[h], r5    \n\t"            \
2076
        /* al * bh */                                    \
2077
        "lsrs r4, %[b], #16   \n\t"            \
2078
        "muls r6, r4, r6    \n\t"            \
2079
        "lsrs r4, r6, #16   \n\t"            \
2080
        "lsls r6, r6, #16   \n\t"            \
2081
        "adds %[l], %[l], r6    \n\t"            \
2082
        "adcs %[h], %[h], r4    \n\t"            \
2083
        /* ah * bh */                                    \
2084
        "lsrs r6, %[a], #16   \n\t"            \
2085
        "lsrs r4, %[b], #16   \n\t"            \
2086
        "muls r4, r6, r4    \n\t"            \
2087
        "adds %[h], %[h], r4    \n\t"            \
2088
        /* ah * bl */                                    \
2089
        "uxth r4, %[b]    \n\t"            \
2090
        "muls r6, r4, r6    \n\t"            \
2091
        "lsrs r4, r6, #16   \n\t"            \
2092
        "lsls r6, r6, #16   \n\t"            \
2093
        "adds %[l], %[l], r6    \n\t"            \
2094
        "adcs %[h], %[h], r4    \n\t"            \
2095
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2096
        : [a] "l" (va), [b] "l" (vb)                     \
2097
        : "r4", "r5", "r6", "cc"                         \
2098
    )
2099
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2100
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2101
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2102
    __asm__ __volatile__ (                               \
2103
        /* al * bl */                                    \
2104
        "uxth r6, %[a]    \n\t"            \
2105
        "uxth r7, %[b]    \n\t"            \
2106
        "muls r7, r6, r7    \n\t"            \
2107
        "adds %[l], %[l], r7    \n\t"            \
2108
        "movs r5, #0      \n\t"            \
2109
        "adcs %[h], %[h], r5    \n\t"            \
2110
        "adcs %[o], %[o], r5    \n\t"            \
2111
        "adds %[l], %[l], r7    \n\t"            \
2112
        "adcs %[h], %[h], r5    \n\t"            \
2113
        "adcs %[o], %[o], r5    \n\t"            \
2114
        /* al * bh */                                    \
2115
        "lsrs r7, %[b], #16   \n\t"            \
2116
        "muls r6, r7, r6    \n\t"            \
2117
        "lsrs r7, r6, #16   \n\t"            \
2118
        "lsls r6, r6, #16   \n\t"            \
2119
        "adds %[l], %[l], r6    \n\t"            \
2120
        "adcs %[h], %[h], r7    \n\t"            \
2121
        "adcs %[o], %[o], r5    \n\t"            \
2122
        "adds %[l], %[l], r6    \n\t"            \
2123
        "adcs %[h], %[h], r7    \n\t"            \
2124
        "adcs %[o], %[o], r5    \n\t"            \
2125
        /* ah * bh */                                    \
2126
        "lsrs r6, %[a], #16   \n\t"            \
2127
        "lsrs r7, %[b], #16   \n\t"            \
2128
        "muls r7, r6, r7    \n\t"            \
2129
        "adds %[h], %[h], r7    \n\t"            \
2130
        "adcs %[o], %[o], r5    \n\t"            \
2131
        "adds %[h], %[h], r7    \n\t"            \
2132
        "adcs %[o], %[o], r5    \n\t"            \
2133
        /* ah * bl */                                    \
2134
        "uxth r7, %[b]    \n\t"            \
2135
        "muls r6, r7, r6    \n\t"            \
2136
        "lsrs r7, r6, #16   \n\t"            \
2137
        "lsls r6, r6, #16   \n\t"            \
2138
        "adds %[l], %[l], r6    \n\t"            \
2139
        "adcs %[h], %[h], r7    \n\t"            \
2140
        "adcs %[o], %[o], r5    \n\t"            \
2141
        "adds %[l], %[l], r6    \n\t"            \
2142
        "adcs %[h], %[h], r7    \n\t"            \
2143
        "adcs %[o], %[o], r5    \n\t"            \
2144
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2145
        : [a] "l" (va), [b] "l" (vb)                     \
2146
        : "r5", "r6", "r7", "cc"                         \
2147
    )
2148
#else
2149
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2150
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2151
    __asm__ __volatile__ (                               \
2152
        "movs r8, %[a]    \n\t"            \
2153
        /* al * bl */                                    \
2154
        "uxth r6, %[a]    \n\t"            \
2155
        "uxth r5, %[b]    \n\t"            \
2156
        "muls r5, r6, r5    \n\t"            \
2157
        "adds %[l], %[l], r5    \n\t"            \
2158
        "movs %[a], #0    \n\t"            \
2159
        "adcs %[h], %[h], %[a]  \n\t"            \
2160
        "adcs %[o], %[o], %[a]  \n\t"            \
2161
        "adds %[l], %[l], r5    \n\t"            \
2162
        "adcs %[h], %[h], %[a]  \n\t"            \
2163
        "adcs %[o], %[o], %[a]  \n\t"            \
2164
        /* al * bh */                                    \
2165
        "lsrs r5, %[b], #16   \n\t"            \
2166
        "muls r6, r5, r6    \n\t"            \
2167
        "lsrs r5, r6, #16   \n\t"            \
2168
        "lsls r6, r6, #16   \n\t"            \
2169
        "adds %[l], %[l], r6    \n\t"            \
2170
        "adcs %[h], %[h], r5    \n\t"            \
2171
        "adcs %[o], %[o], %[a]  \n\t"            \
2172
        "adds %[l], %[l], r6    \n\t"            \
2173
        "adcs %[h], %[h], r5    \n\t"            \
2174
        "adcs %[o], %[o], %[a]  \n\t"            \
2175
        /* ah * bh */                                    \
2176
        "movs %[a], r8    \n\t"            \
2177
        "lsrs r6, %[a], #16   \n\t"            \
2178
        "lsrs r5, %[b], #16   \n\t"            \
2179
        "muls r5, r6, r5    \n\t"            \
2180
        "adds %[h], %[h], r5    \n\t"            \
2181
        "movs %[a], #0    \n\t"            \
2182
        "adcs %[o], %[o], %[a]  \n\t"            \
2183
        "adds %[h], %[h], r5    \n\t"            \
2184
        "adcs %[o], %[o], %[a]  \n\t"            \
2185
        /* ah * bl */                                    \
2186
        "uxth r5, %[b]    \n\t"            \
2187
        "muls r6, r5, r6    \n\t"            \
2188
        "lsrs r5, r6, #16   \n\t"            \
2189
        "lsls r6, r6, #16   \n\t"            \
2190
        "adds %[l], %[l], r6    \n\t"            \
2191
        "adcs %[h], %[h], r5    \n\t"            \
2192
        "adcs %[o], %[o], %[a]  \n\t"            \
2193
        "adds %[l], %[l], r6    \n\t"            \
2194
        "adcs %[h], %[h], r5    \n\t"            \
2195
        "adcs %[o], %[o], %[a]  \n\t"            \
2196
        "movs %[a], r8    \n\t"            \
2197
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2198
        : [a] "l" (va), [b] "l" (vb)                     \
2199
        : "r5", "r6", "r8", "cc"                         \
2200
    )
2201
#endif
2202
#ifndef DEBUG
2203
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2204
 * Assumes first add will not overflow vh | vl
2205
 */
2206
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2207
    __asm__ __volatile__ (                               \
2208
        /* al * bl */                                    \
2209
        "uxth r6, %[a]    \n\t"            \
2210
        "uxth r7, %[b]    \n\t"            \
2211
        "muls r7, r6, r7    \n\t"            \
2212
        "adds %[l], %[l], r7    \n\t"            \
2213
        "movs r5, #0      \n\t"            \
2214
        "adcs %[h], %[h], r5    \n\t"            \
2215
        "adds %[l], %[l], r7    \n\t"            \
2216
        "adcs %[h], %[h], r5    \n\t"            \
2217
        /* al * bh */                                    \
2218
        "lsrs r7, %[b], #16   \n\t"            \
2219
        "muls r6, r7, r6    \n\t"            \
2220
        "lsrs r7, r6, #16   \n\t"            \
2221
        "lsls r6, r6, #16   \n\t"            \
2222
        "adds %[l], %[l], r6    \n\t"            \
2223
        "adcs %[h], %[h], r7    \n\t"            \
2224
        "adds %[l], %[l], r6    \n\t"            \
2225
        "adcs %[h], %[h], r7    \n\t"            \
2226
        "adcs %[o], %[o], r5    \n\t"            \
2227
        /* ah * bh */                                    \
2228
        "lsrs r6, %[a], #16   \n\t"            \
2229
        "lsrs r7, %[b], #16   \n\t"            \
2230
        "muls r7, r6, r7    \n\t"            \
2231
        "adds %[h], %[h], r7    \n\t"            \
2232
        "adcs %[o], %[o], r5    \n\t"            \
2233
        "adds %[h], %[h], r7    \n\t"            \
2234
        "adcs %[o], %[o], r5    \n\t"            \
2235
        /* ah * bl */                                    \
2236
        "uxth r7, %[b]    \n\t"            \
2237
        "muls r6, r7, r6    \n\t"            \
2238
        "lsrs r7, r6, #16   \n\t"            \
2239
        "lsls r6, r6, #16   \n\t"            \
2240
        "adds %[l], %[l], r6    \n\t"            \
2241
        "adcs %[h], %[h], r7    \n\t"            \
2242
        "adcs %[o], %[o], r5    \n\t"            \
2243
        "adds %[l], %[l], r6    \n\t"            \
2244
        "adcs %[h], %[h], r7    \n\t"            \
2245
        "adcs %[o], %[o], r5    \n\t"            \
2246
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2247
        : [a] "l" (va), [b] "l" (vb)                     \
2248
        : "r5", "r6", "r7", "cc"                         \
2249
    )
2250
#else
2251
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2252
 * Assumes first add will not overflow vh | vl
2253
 */
2254
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2255
    __asm__ __volatile__ (                               \
2256
        "movs r8, %[a]    \n\t"            \
2257
        /* al * bl */                                    \
2258
        "uxth r5, %[a]    \n\t"            \
2259
        "uxth r6, %[b]    \n\t"            \
2260
        "muls r6, r5, r6    \n\t"            \
2261
        "adds %[l], %[l], r6    \n\t"            \
2262
        "movs %[a], #0    \n\t"            \
2263
        "adcs %[h], %[h], %[a]  \n\t"            \
2264
        "adds %[l], %[l], r6    \n\t"            \
2265
        "adcs %[h], %[h], %[a]  \n\t"            \
2266
        /* al * bh */                                    \
2267
        "lsrs r6, %[b], #16   \n\t"            \
2268
        "muls r5, r6, r5    \n\t"            \
2269
        "lsrs r6, r5, #16   \n\t"            \
2270
        "lsls r5, r5, #16   \n\t"            \
2271
        "adds %[l], %[l], r5    \n\t"            \
2272
        "adcs %[h], %[h], r6    \n\t"            \
2273
        "adds %[l], %[l], r5    \n\t"            \
2274
        "adcs %[h], %[h], r6    \n\t"            \
2275
        "adcs %[o], %[o], %[a]  \n\t"            \
2276
        /* ah * bh */                                    \
2277
        "movs %[a], r8    \n\t"            \
2278
        "lsrs r5, %[a], #16   \n\t"            \
2279
        "lsrs r6, %[b], #16   \n\t"            \
2280
        "muls r6, r5, r6    \n\t"            \
2281
        "movs %[a], #0    \n\t"            \
2282
        "adds %[h], %[h], r6    \n\t"            \
2283
        "adcs %[o], %[o], %[a]  \n\t"            \
2284
        "adds %[h], %[h], r6    \n\t"            \
2285
        "adcs %[o], %[o], %[a]  \n\t"            \
2286
        /* ah * bl */                                    \
2287
        "uxth r6, %[b]    \n\t"            \
2288
        "muls r5, r6, r5    \n\t"            \
2289
        "lsrs r6, r5, #16   \n\t"            \
2290
        "lsls r5, r5, #16   \n\t"            \
2291
        "adds %[l], %[l], r5    \n\t"            \
2292
        "adcs %[h], %[h], r6    \n\t"            \
2293
        "adcs %[o], %[o], %[a]  \n\t"            \
2294
        "adds %[l], %[l], r5    \n\t"            \
2295
        "adcs %[h], %[h], r6    \n\t"            \
2296
        "adcs %[o], %[o], %[a]  \n\t"            \
2297
        "movs %[a], r8    \n\t"            \
2298
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2299
        : [a] "l" (va), [b] "l" (vb)                     \
2300
        : "r5", "r6", "r8", "cc"                         \
2301
    )
2302
#endif
2303
/* Square va and store double size result in: vh | vl */
2304
#define SP_ASM_SQR(vl, vh, va)                           \
2305
    __asm__ __volatile__ (                               \
2306
        "lsrs r5, %[a], #16   \n\t"            \
2307
        "uxth r6, %[a]    \n\t"            \
2308
        "mov  %[l], r6    \n\t"            \
2309
        "mov  %[h], r5    \n\t"            \
2310
        /* al * al */                                    \
2311
        "muls %[l], %[l], %[l]  \n\t"            \
2312
        /* ah * ah */                                    \
2313
        "muls %[h], %[h], %[h]  \n\t"            \
2314
        /* 2 * al * ah */                                \
2315
        "muls r6, r5, r6    \n\t"            \
2316
        "lsrs r5, r6, #15   \n\t"            \
2317
        "lsls r6, r6, #17   \n\t"            \
2318
        "adds %[l], %[l], r6    \n\t"            \
2319
        "adcs %[h], %[h], r5    \n\t"            \
2320
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2321
        : [a] "l" (va)                                   \
2322
        : "r5", "r6", "cc"                               \
2323
    )
2324
/* Square va and add double size result into: vo | vh | vl */
2325
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
2326
    __asm__ __volatile__ (                               \
2327
        "lsrs r4, %[a], #16   \n\t"            \
2328
        "uxth r6, %[a]    \n\t"            \
2329
        /* al * al */                                    \
2330
        "muls r6, r6, r6    \n\t"            \
2331
        /* ah * ah */                                    \
2332
        "muls r4, r4, r4    \n\t"            \
2333
        "adds %[l], %[l], r6    \n\t"            \
2334
        "adcs %[h], %[h], r4    \n\t"            \
2335
        "movs r5, #0      \n\t"            \
2336
        "adcs %[o], %[o], r5    \n\t"            \
2337
        "lsrs r4, %[a], #16   \n\t"            \
2338
        "uxth r6, %[a]    \n\t"            \
2339
        /* 2 * al * ah */                                \
2340
        "muls r6, r4, r6    \n\t"            \
2341
        "lsrs r4, r6, #15   \n\t"            \
2342
        "lsls r6, r6, #17   \n\t"            \
2343
        "adds %[l], %[l], r6    \n\t"            \
2344
        "adcs %[h], %[h], r4    \n\t"            \
2345
        "adcs %[o], %[o], r5    \n\t"            \
2346
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2347
        : [a] "l" (va)                                   \
2348
        : "r4", "r5", "r6", "cc"                         \
2349
    )
2350
/* Square va and add double size result into: vh | vl */
2351
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2352
    __asm__ __volatile__ (                               \
2353
        "lsrs r5, %[a], #16   \n\t"            \
2354
        "uxth r6, %[a]    \n\t"            \
2355
        /* al * al */                                    \
2356
        "muls r6, r6, r6    \n\t"            \
2357
        /* ah * ah */                                    \
2358
        "muls r5, r5, r5    \n\t"            \
2359
        "adds %[l], %[l], r6    \n\t"            \
2360
        "adcs %[h], %[h], r5    \n\t"            \
2361
        "lsrs r5, %[a], #16   \n\t"            \
2362
        "uxth r6, %[a]    \n\t"            \
2363
        /* 2 * al * ah */                                \
2364
        "muls r6, r5, r6    \n\t"            \
2365
        "lsrs r5, r6, #15   \n\t"            \
2366
        "lsls r6, r6, #17   \n\t"            \
2367
        "adds %[l], %[l], r6    \n\t"            \
2368
        "adcs %[h], %[h], r5    \n\t"            \
2369
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2370
        : [a] "l" (va)                                   \
2371
        : "r5", "r6", "cc"                               \
2372
    )
2373
/* Add va into: vh | vl */
2374
#define SP_ASM_ADDC(vl, vh, va)                          \
2375
    __asm__ __volatile__ (                               \
2376
        "adds %[l], %[l], %[a]  \n\t"            \
2377
        "movs r5, #0      \n\t"            \
2378
        "adcs %[h], %[h], r5    \n\t"            \
2379
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2380
        : [a] "l" (va)                                   \
2381
        : "r5", "cc"                                     \
2382
    )
2383
/* Sub va from: vh | vl */
2384
#define SP_ASM_SUBB(vl, vh, va)                          \
2385
    __asm__ __volatile__ (                               \
2386
        "subs %[l], %[l], %[a]  \n\t"            \
2387
        "movs r5, #0      \n\t"            \
2388
        "sbcs %[h], %[h], r5    \n\t"            \
2389
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2390
        : [a] "l" (va)                                   \
2391
        : "r5", "cc"                                     \
2392
    )
2393
/* Add two times vc | vb | va into vo | vh | vl */
2394
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2395
    __asm__ __volatile__ (                               \
2396
        "adds %[l], %[l], %[a]  \n\t"            \
2397
        "adcs %[h], %[h], %[b]  \n\t"            \
2398
        "adcs %[o], %[o], %[c]  \n\t"            \
2399
        "adds %[l], %[l], %[a]  \n\t"            \
2400
        "adcs %[h], %[h], %[b]  \n\t"            \
2401
        "adcs %[o], %[o], %[c]  \n\t"            \
2402
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2403
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2404
        : "cc"                                           \
2405
    )
2406
2407
#elif defined(__GNUC__)
2408
2409
/* Multiply va by vb and store double size result in: vh | vl */
2410
#define SP_ASM_MUL(vl, vh, va, vb)                       \
2411
    __asm__ __volatile__ (                               \
2412
        /* al * bl */                                    \
2413
        "uxth r6, %[a]    \n\t"            \
2414
        "uxth %[l], %[b]    \n\t"            \
2415
        "mul  %[l], r6    \n\t"            \
2416
        /* al * bh */                                    \
2417
        "lsr  r4, %[b], #16   \n\t"            \
2418
        "mul  r6, r4      \n\t"            \
2419
        "lsr  %[h], r6, #16   \n\t"            \
2420
        "lsl  r6, r6, #16   \n\t"            \
2421
        "add  %[l], %[l], r6    \n\t"            \
2422
        "mov  r5, #0      \n\t"            \
2423
        "adc  %[h], r5    \n\t"            \
2424
        /* ah * bh */                                    \
2425
        "lsr  r6, %[a], #16   \n\t"            \
2426
        "mul  r4, r6      \n\t"            \
2427
        "add  %[h], %[h], r4    \n\t"            \
2428
        /* ah * bl */                                    \
2429
        "uxth r4, %[b]    \n\t"            \
2430
        "mul  r6, r4      \n\t"            \
2431
        "lsr  r4, r6, #16   \n\t"            \
2432
        "lsl  r6, r6, #16   \n\t"            \
2433
        "add  %[l], %[l], r6    \n\t"            \
2434
        "adc  %[h], r4    \n\t"            \
2435
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2436
        : [a] "l" (va), [b] "l" (vb)                     \
2437
        : "r4", "r5", "r6", "cc"                         \
2438
    )
2439
/* Multiply va by vb and store double size result in: vo | vh | vl */
2440
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
2441
    __asm__ __volatile__ (                               \
2442
        /* al * bl */                                    \
2443
        "uxth r6, %[a]    \n\t"            \
2444
        "uxth %[l], %[b]    \n\t"            \
2445
        "mul  %[l], r6    \n\t"            \
2446
        /* al * bh */                                    \
2447
        "lsr  r5, %[b], #16   \n\t"            \
2448
        "mul  r6, r5      \n\t"            \
2449
        "lsr  %[h], r6, #16   \n\t"            \
2450
        "lsl  r6, r6, #16   \n\t"            \
2451
        "add  %[l], %[l], r6    \n\t"            \
2452
        "mov  %[o], #0    \n\t"            \
2453
        "adc  %[h], %[o]    \n\t"            \
2454
        /* ah * bh */                                    \
2455
        "lsr  r6, %[a], #16   \n\t"            \
2456
        "mul  r5, r6      \n\t"            \
2457
        "add  %[h], %[h], r5    \n\t"            \
2458
        /* ah * bl */                                    \
2459
        "uxth r5, %[b]    \n\t"            \
2460
        "mul  r6, r5      \n\t"            \
2461
        "lsr  r5, r6, #16   \n\t"            \
2462
        "lsl  r6, r6, #16   \n\t"            \
2463
        "add  %[l], %[l], r6    \n\t"            \
2464
        "adc  %[h], r5    \n\t"            \
2465
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2466
        : [a] "l" (va), [b] "l" (vb)                     \
2467
        : "r5", "r6", "cc"                               \
2468
    )
2469
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2470
/* Multiply va by vb and add double size result into: vo | vh | vl */
2471
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2472
    __asm__ __volatile__ (                               \
2473
        /* al * bl */                                    \
2474
        "uxth r6, %[a]    \n\t"            \
2475
        "uxth r7, %[b]    \n\t"            \
2476
        "mul  r7, r6      \n\t"            \
2477
        "add  %[l], %[l], r7    \n\t"            \
2478
        "mov  r5, #0      \n\t"            \
2479
        "adc  %[h], r5    \n\t"            \
2480
        "adc  %[o], r5    \n\t"            \
2481
        /* al * bh */                                    \
2482
        "lsr  r7, %[b], #16   \n\t"            \
2483
        "mul  r6, r7      \n\t"            \
2484
        "lsr  r7, r6, #16   \n\t"            \
2485
        "lsl  r6, r6, #16   \n\t"            \
2486
        "add  %[l], %[l], r6    \n\t"            \
2487
        "adc  %[h], r7    \n\t"            \
2488
        "adc  %[o], r5    \n\t"            \
2489
        /* ah * bh */                                    \
2490
        "lsr  r6, %[a], #16   \n\t"            \
2491
        "lsr  r7, %[b], #16   \n\t"            \
2492
        "mul  r7, r6      \n\t"            \
2493
        "add  %[h], %[h], r7    \n\t"            \
2494
        "adc  %[o], r5    \n\t"            \
2495
        /* ah * bl */                                    \
2496
        "uxth r7, %[b]    \n\t"            \
2497
        "mul  r6, r7      \n\t"            \
2498
        "lsr  r7, r6, #16   \n\t"            \
2499
        "lsl  r6, r6, #16   \n\t"            \
2500
        "add  %[l], %[l], r6    \n\t"            \
2501
        "adc  %[h], r7    \n\t"            \
2502
        "adc  %[o], r5    \n\t"            \
2503
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2504
        : [a] "l" (va), [b] "l" (vb)                     \
2505
        : "r5", "r6", "r7", "cc"                         \
2506
    )
2507
#else
2508
/* Multiply va by vb and add double size result into: vo | vh | vl */
2509
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2510
    __asm__ __volatile__ (                               \
2511
        /* al * bl */                                    \
2512
        "uxth   r6, %[a]                \n\t"            \
2513
        "uxth   r5, %[b]                \n\t"            \
2514
        "mul    r5, r6                  \n\t"            \
2515
        "add    %[l], %[l], r5          \n\t"            \
2516
        "mov    r5, #0                  \n\t"            \
2517
        "adc    %[h], r5                \n\t"            \
2518
        "adc    %[o], r5                \n\t"            \
2519
        /* al * bh */                                    \
2520
        "lsr    r5, %[b], #16           \n\t"            \
2521
        "mul    r6, r5                  \n\t"            \
2522
        "lsr    r5, r6, #16             \n\t"            \
2523
        "lsl    r6, r6, #16             \n\t"            \
2524
        "add    %[l], %[l], r6          \n\t"            \
2525
        "adc    %[h], r5                \n\t"            \
2526
        "mov    r5, #0                  \n\t"            \
2527
        "adc    %[o], r5                \n\t"            \
2528
        /* ah * bh */                                    \
2529
        "lsr    r6, %[a], #16           \n\t"            \
2530
        "lsr    r5, %[b], #16           \n\t"            \
2531
        "mul    r5, r6                  \n\t"            \
2532
        "add    %[h], %[h], r5          \n\t"            \
2533
        "mov    r5, #0                  \n\t"            \
2534
        "adc    %[o], r5                \n\t"            \
2535
        /* ah * bl */                                    \
2536
        "uxth   r5, %[b]                \n\t"            \
2537
        "mul    r6, r5                  \n\t"            \
2538
        "lsr    r5, r6, #16             \n\t"            \
2539
        "lsl    r6, r6, #16             \n\t"            \
2540
        "add    %[l], %[l], r6          \n\t"            \
2541
        "adc    %[h], r5                \n\t"            \
2542
        "mov    r5, #0                  \n\t"            \
2543
        "adc    %[o], r5                \n\t"            \
2544
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2545
        : [a] "l" (va), [b] "l" (vb)                     \
2546
        : "r5", "r6", "cc"                               \
2547
    )
2548
#endif
2549
/* Multiply va by vb and add double size result into: vh | vl */
2550
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
2551
    __asm__ __volatile__ (                               \
2552
        /* al * bl */                                    \
2553
        "uxth r6, %[a]    \n\t"            \
2554
        "uxth r4, %[b]    \n\t"            \
2555
        "mul  r4, r6      \n\t"            \
2556
        "add  %[l], %[l], r4    \n\t"            \
2557
        "mov  r5, #0      \n\t"            \
2558
        "adc  %[h], r5    \n\t"            \
2559
        /* al * bh */                                    \
2560
        "lsr  r4, %[b], #16   \n\t"            \
2561
        "mul  r6, r4      \n\t"            \
2562
        "lsr  r4, r6, #16   \n\t"            \
2563
        "lsl  r6, r6, #16   \n\t"            \
2564
        "add  %[l], %[l], r6    \n\t"            \
2565
        "adc  %[h], r4    \n\t"            \
2566
        /* ah * bh */                                    \
2567
        "lsr  r6, %[a], #16   \n\t"            \
2568
        "lsr  r4, %[b], #16   \n\t"            \
2569
        "mul  r4, r6      \n\t"            \
2570
        "add  %[h], %[h], r4    \n\t"            \
2571
        /* ah * bl */                                    \
2572
        "uxth r4, %[b]    \n\t"            \
2573
        "mul  r6, r4      \n\t"            \
2574
        "lsr  r4, r6, #16   \n\t"            \
2575
        "lsl  r6, r6, #16   \n\t"            \
2576
        "add  %[l], %[l], r6    \n\t"            \
2577
        "adc  %[h], r4    \n\t"            \
2578
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2579
        : [a] "l" (va), [b] "l" (vb)                     \
2580
        : "r4", "r5", "r6", "cc"                         \
2581
    )
2582
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2583
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2584
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2585
    __asm__ __volatile__ (                               \
2586
        /* al * bl */                                    \
2587
        "uxth r6, %[a]    \n\t"            \
2588
        "uxth r7, %[b]    \n\t"            \
2589
        "mul  r7, r6      \n\t"            \
2590
        "add  %[l], %[l], r7    \n\t"            \
2591
        "mov  r5, #0      \n\t"            \
2592
        "adc  %[h], r5    \n\t"            \
2593
        "adc  %[o], r5    \n\t"            \
2594
        "add  %[l], %[l], r7    \n\t"            \
2595
        "adc  %[h], r5    \n\t"            \
2596
        "adc  %[o], r5    \n\t"            \
2597
        /* al * bh */                                    \
2598
        "lsr  r7, %[b], #16   \n\t"            \
2599
        "mul  r6, r7      \n\t"            \
2600
        "lsr  r7, r6, #16   \n\t"            \
2601
        "lsl  r6, r6, #16   \n\t"            \
2602
        "add  %[l], %[l], r6    \n\t"            \
2603
        "adc  %[h], r7    \n\t"            \
2604
        "adc  %[o], r5    \n\t"            \
2605
        "add  %[l], %[l], r6    \n\t"            \
2606
        "adc  %[h], r7    \n\t"            \
2607
        "adc  %[o], r5    \n\t"            \
2608
        /* ah * bh */                                    \
2609
        "lsr  r6, %[a], #16   \n\t"            \
2610
        "lsr  r7, %[b], #16   \n\t"            \
2611
        "mul  r7, r6      \n\t"            \
2612
        "add  %[h], %[h], r7    \n\t"            \
2613
        "adc  %[o], r5    \n\t"            \
2614
        "add  %[h], %[h], r7    \n\t"            \
2615
        "adc  %[o], r5    \n\t"            \
2616
        /* ah * bl */                                    \
2617
        "uxth r7, %[b]    \n\t"            \
2618
        "mul  r6, r7      \n\t"            \
2619
        "lsr  r7, r6, #16   \n\t"            \
2620
        "lsl  r6, r6, #16   \n\t"            \
2621
        "add  %[l], %[l], r6    \n\t"            \
2622
        "adc  %[h], r7    \n\t"            \
2623
        "adc  %[o], r5    \n\t"            \
2624
        "add  %[l], %[l], r6    \n\t"            \
2625
        "adc  %[h], r7    \n\t"            \
2626
        "adc  %[o], r5    \n\t"            \
2627
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2628
        : [a] "l" (va), [b] "l" (vb)                     \
2629
        : "r5", "r6", "r7", "cc"                         \
2630
    )
2631
#else
2632
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2633
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2634
    __asm__ __volatile__ (                               \
2635
        "mov    r8, %[a]                \n\t"            \
2636
        /* al * bl */                                    \
2637
        "uxth   r6, %[a]                \n\t"            \
2638
        "uxth   r5, %[b]                \n\t"            \
2639
        "mul    r5, r6                  \n\t"            \
2640
        "add    %[l], %[l], r5          \n\t"            \
2641
        "mov    %[a], #0                \n\t"            \
2642
        "adc    %[h], %[a]              \n\t"            \
2643
        "adc    %[o], %[a]              \n\t"            \
2644
        "add    %[l], %[l], r5          \n\t"            \
2645
        "adc    %[h], %[a]              \n\t"            \
2646
        "adc    %[o], %[a]              \n\t"            \
2647
        /* al * bh */                                    \
2648
        "lsr    r5, %[b], #16           \n\t"            \
2649
        "mul    r6, r5                  \n\t"            \
2650
        "lsr    r5, r6, #16             \n\t"            \
2651
        "lsl    r6, r6, #16             \n\t"            \
2652
        "add    %[l], %[l], r6          \n\t"            \
2653
        "adc    %[h], r5                \n\t"            \
2654
        "adc    %[o], %[a]              \n\t"            \
2655
        "add    %[l], %[l], r6          \n\t"            \
2656
        "adc    %[h], r5                \n\t"            \
2657
        "adc    %[o], %[a]              \n\t"            \
2658
        /* ah * bh */                                    \
2659
        "mov    %[a], r8                \n\t"            \
2660
        "lsr    r6, %[a], #16           \n\t"            \
2661
        "lsr    r5, %[b], #16           \n\t"            \
2662
        "mul    r5, r6                  \n\t"            \
2663
        "add    %[h], %[h], r5          \n\t"            \
2664
        "mov    %[a], #0                \n\t"            \
2665
        "adc    %[o], %[a]              \n\t"            \
2666
        "add    %[h], %[h], r5          \n\t"            \
2667
        "adc    %[o], %[a]              \n\t"            \
2668
        /* ah * bl */                                    \
2669
        "uxth   r5, %[b]                \n\t"            \
2670
        "mul    r6, r5                  \n\t"            \
2671
        "lsr    r5, r6, #16             \n\t"            \
2672
        "lsl    r6, r6, #16             \n\t"            \
2673
        "add    %[l], %[l], r6          \n\t"            \
2674
        "adc    %[h], r5                \n\t"            \
2675
        "adc    %[o], %[a]              \n\t"            \
2676
        "add    %[l], %[l], r6          \n\t"            \
2677
        "adc    %[h], r5                \n\t"            \
2678
        "adc    %[o], %[a]              \n\t"            \
2679
        "mov    %[a], r8                \n\t"            \
2680
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2681
        : [a] "l" (va), [b] "l" (vb)                     \
2682
        : "r5", "r6", "r8", "cc"                         \
2683
    )
2684
#endif
2685
#ifndef DEBUG
2686
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2687
 * Assumes first add will not overflow vh | vl
2688
 */
2689
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2690
    __asm__ __volatile__ (                               \
2691
        /* al * bl */                                    \
2692
        "uxth r6, %[a]    \n\t"            \
2693
        "uxth r7, %[b]    \n\t"            \
2694
        "mul  r7, r6      \n\t"            \
2695
        "add  %[l], %[l], r7    \n\t"            \
2696
        "mov  r5, #0      \n\t"            \
2697
        "adc  %[h], r5    \n\t"            \
2698
        "add  %[l], %[l], r7    \n\t"            \
2699
        "adc  %[h], r5    \n\t"            \
2700
        /* al * bh */                                    \
2701
        "lsr  r7, %[b], #16   \n\t"            \
2702
        "mul  r6, r7      \n\t"            \
2703
        "lsr  r7, r6, #16   \n\t"            \
2704
        "lsl  r6, r6, #16   \n\t"            \
2705
        "add  %[l], %[l], r6    \n\t"            \
2706
        "adc  %[h], r7    \n\t"            \
2707
        "add  %[l], %[l], r6    \n\t"            \
2708
        "adc  %[h], r7    \n\t"            \
2709
        "adc  %[o], r5    \n\t"            \
2710
        /* ah * bh */                                    \
2711
        "lsr  r6, %[a], #16   \n\t"            \
2712
        "lsr  r7, %[b], #16   \n\t"            \
2713
        "mul  r7, r6      \n\t"            \
2714
        "add  %[h], %[h], r7    \n\t"            \
2715
        "adc  %[o], r5    \n\t"            \
2716
        "add  %[h], %[h], r7    \n\t"            \
2717
        "adc  %[o], r5    \n\t"            \
2718
        /* ah * bl */                                    \
2719
        "uxth r7, %[b]    \n\t"            \
2720
        "mul  r6, r7      \n\t"            \
2721
        "lsr  r7, r6, #16   \n\t"            \
2722
        "lsl  r6, r6, #16   \n\t"            \
2723
        "add  %[l], %[l], r6    \n\t"            \
2724
        "adc  %[h], r7    \n\t"            \
2725
        "adc  %[o], r5    \n\t"            \
2726
        "add  %[l], %[l], r6    \n\t"            \
2727
        "adc  %[h], r7    \n\t"            \
2728
        "adc  %[o], r5    \n\t"            \
2729
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2730
        : [a] "l" (va), [b] "l" (vb)                     \
2731
        : "r5", "r6", "r7", "cc"                         \
2732
    )
2733
#else
2734
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2735
 * Assumes first add will not overflow vh | vl
2736
 */
2737
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2738
    __asm__ __volatile__ (                               \
2739
        "mov  r8, %[a]    \n\t"            \
2740
        /* al * bl */                                    \
2741
        "uxth r5, %[a]    \n\t"            \
2742
        "uxth r6, %[b]    \n\t"            \
2743
        "mul  r6, r5      \n\t"            \
2744
        "add  %[l], %[l], r6    \n\t"            \
2745
        "mov  %[a], #0    \n\t"            \
2746
        "adc  %[h], %[a]    \n\t"            \
2747
        "add  %[l], %[l], r6    \n\t"            \
2748
        "adc  %[h], %[a]    \n\t"            \
2749
        /* al * bh */                                    \
2750
        "lsr  r6, %[b], #16   \n\t"            \
2751
        "mul  r5, r6      \n\t"            \
2752
        "lsr  r6, r5, #16   \n\t"            \
2753
        "lsl  r5, r5, #16   \n\t"            \
2754
        "add  %[l], %[l], r5    \n\t"            \
2755
        "adc  %[h], r6    \n\t"            \
2756
        "add  %[l], %[l], r5    \n\t"            \
2757
        "adc  %[h], r6    \n\t"            \
2758
        "adc  %[o], %[a]    \n\t"            \
2759
        /* ah * bh */                                    \
2760
        "mov    %[a], r8                \n\t"            \
2761
        "lsr  r5, %[a], #16   \n\t"            \
2762
        "lsr  r6, %[b], #16   \n\t"            \
2763
        "mul  r6, r5      \n\t"            \
2764
        "mov    %[a], #0                \n\t"            \
2765
        "add  %[h], %[h], r6    \n\t"            \
2766
        "adc  %[o], %[a]    \n\t"            \
2767
        "add  %[h], %[h], r6    \n\t"            \
2768
        "adc  %[o], %[a]    \n\t"            \
2769
        /* ah * bl */                                    \
2770
        "uxth r6, %[b]    \n\t"            \
2771
        "mul  r5, r6      \n\t"            \
2772
        "lsr  r6, r5, #16   \n\t"            \
2773
        "lsl  r5, r5, #16   \n\t"            \
2774
        "add  %[l], %[l], r5    \n\t"            \
2775
        "adc  %[h], r6    \n\t"            \
2776
        "adc  %[o], %[a]    \n\t"            \
2777
        "add  %[l], %[l], r5    \n\t"            \
2778
        "adc  %[h], r6    \n\t"            \
2779
        "adc  %[o], %[a]    \n\t"            \
2780
        "mov    %[a], r8                \n\t"            \
2781
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2782
        : [a] "l" (va), [b] "l" (vb)                     \
2783
        : "r5", "r6", "r8", "cc"                         \
2784
    )
2785
#endif
2786
/* Square va and store double size result in: vh | vl */
2787
#define SP_ASM_SQR(vl, vh, va)                           \
2788
    __asm__ __volatile__ (                               \
2789
        "lsr  r5, %[a], #16   \n\t"            \
2790
        "uxth r6, %[a]    \n\t"            \
2791
        "mov  %[l], r6    \n\t"            \
2792
        "mov  %[h], r5    \n\t"            \
2793
        /* al * al */                                    \
2794
        "mul  %[l], %[l]    \n\t"            \
2795
        /* ah * ah */                                    \
2796
        "mul  %[h], %[h]    \n\t"            \
2797
        /* 2 * al * ah */                                \
2798
        "mul  r6, r5      \n\t"            \
2799
        "lsr  r5, r6, #15   \n\t"            \
2800
        "lsl  r6, r6, #17   \n\t"            \
2801
        "add  %[l], %[l], r6    \n\t"            \
2802
        "adc  %[h], r5    \n\t"            \
2803
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2804
        : [a] "l" (va)                                   \
2805
        : "r5", "r6", "cc"                               \
2806
    )
2807
/* Square va and add double size result into: vo | vh | vl */
2808
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
2809
    __asm__ __volatile__ (                               \
2810
        "lsr  r4, %[a], #16   \n\t"            \
2811
        "uxth r6, %[a]    \n\t"            \
2812
        /* al * al */                                    \
2813
        "mul  r6, r6      \n\t"            \
2814
        /* ah * ah */                                    \
2815
        "mul  r4, r4      \n\t"            \
2816
        "add  %[l], %[l], r6    \n\t"            \
2817
        "adc  %[h], r4    \n\t"            \
2818
        "mov  r5, #0      \n\t"            \
2819
        "adc  %[o], r5    \n\t"            \
2820
        "lsr  r4, %[a], #16   \n\t"            \
2821
        "uxth r6, %[a]    \n\t"            \
2822
        /* 2 * al * ah */                                \
2823
        "mul  r6, r4      \n\t"            \
2824
        "lsr  r4, r6, #15   \n\t"            \
2825
        "lsl  r6, r6, #17   \n\t"            \
2826
        "add  %[l], %[l], r6    \n\t"            \
2827
        "adc  %[h], r4    \n\t"            \
2828
        "adc  %[o], r5    \n\t"            \
2829
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2830
        : [a] "l" (va)                                   \
2831
        : "r4", "r5", "r6", "cc"                         \
2832
    )
2833
/* Square va and add double size result into: vh | vl */
2834
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2835
    __asm__ __volatile__ (                               \
2836
        "lsr  r5, %[a], #16   \n\t"            \
2837
        "uxth r6, %[a]    \n\t"            \
2838
        /* al * al */                                    \
2839
        "mul  r6, r6      \n\t"            \
2840
        /* ah * ah */                                    \
2841
        "mul  r5, r5      \n\t"            \
2842
        "add  %[l], %[l], r6    \n\t"            \
2843
        "adc  %[h], r5    \n\t"            \
2844
        "lsr  r5, %[a], #16   \n\t"            \
2845
        "uxth r6, %[a]    \n\t"            \
2846
        /* 2 * al * ah */                                \
2847
        "mul  r6, r5      \n\t"            \
2848
        "lsr  r5, r6, #15   \n\t"            \
2849
        "lsl  r6, r6, #17   \n\t"            \
2850
        "add  %[l], %[l], r6    \n\t"            \
2851
        "adc  %[h], r5    \n\t"            \
2852
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2853
        : [a] "l" (va)                                   \
2854
        : "r5", "r6", "cc"                               \
2855
    )
2856
/* Add va into: vh | vl */
2857
#define SP_ASM_ADDC(vl, vh, va)                          \
2858
    __asm__ __volatile__ (                               \
2859
        "add  %[l], %[l], %[a]  \n\t"            \
2860
        "mov  r5, #0      \n\t"            \
2861
        "adc  %[h], r5    \n\t"            \
2862
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2863
        : [a] "l" (va)                                   \
2864
        : "r5", "cc"                                     \
2865
    )
2866
/* Sub va from: vh | vl */
2867
#define SP_ASM_SUBB(vl, vh, va)                          \
2868
    __asm__ __volatile__ (                               \
2869
        "sub  %[l], %[l], %[a]  \n\t"            \
2870
        "mov  r5, #0      \n\t"            \
2871
        "sbc  %[h], r5    \n\t"            \
2872
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2873
        : [a] "l" (va)                                   \
2874
        : "r5", "cc"                                     \
2875
    )
2876
/* Add two times vc | vb | va into vo | vh | vl */
2877
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2878
    __asm__ __volatile__ (                               \
2879
        "add  %[l], %[l], %[a]  \n\t"            \
2880
        "adc  %[h], %[b]    \n\t"            \
2881
        "adc  %[o], %[c]    \n\t"            \
2882
        "add  %[l], %[l], %[a]  \n\t"            \
2883
        "adc  %[h], %[b]    \n\t"            \
2884
        "adc  %[o], %[c]    \n\t"            \
2885
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2886
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2887
        : "cc"                                           \
2888
    )
2889
2890
#endif
2891
2892
#ifdef WOLFSSL_SP_DIV_WORD_HALF
2893
/* Divide a two digit number by a digit number and return. (hi | lo) / d
2894
 *
2895
 * No division instruction used - does operation bit by bit.
2896
 * Constant time.
2897
 *
2898
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
2899
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
2900
 * @param  [in]  d   SP integer digit. Number to divide by.
2901
 * @return  The division result.
2902
 */
2903
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
2904
                                          sp_int_digit d)
2905
{
2906
    __asm__ __volatile__ (
2907
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2908
        "lsrs r3, %[d], #24\n\t"
2909
#else
2910
        "lsr  r3, %[d], #24\n\t"
2911
#endif
2912
        "beq  2%=f\n\t"
2913
  "\n1%=:\n\t"
2914
        "movs r3, #0\n\t"
2915
        "b  3%=f\n\t"
2916
  "\n2%=:\n\t"
2917
        "mov  r3, #8\n\t"
2918
  "\n3%=:\n\t"
2919
        "movs r4, #31\n\t"
2920
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2921
        "subs r4, r4, r3\n\t"
2922
#else
2923
        "sub  r4, r4, r3\n\t"
2924
#endif
2925
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2926
        "lsls %[d], %[d], r3\n\t"
2927
#else
2928
        "lsl  %[d], %[d], r3\n\t"
2929
#endif
2930
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2931
        "lsls %[hi], %[hi], r3\n\t"
2932
#else
2933
        "lsl  %[hi], %[hi], r3\n\t"
2934
#endif
2935
        "mov  r5, %[lo]\n\t"
2936
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2937
        "lsrs r5, r5, r4\n\t"
2938
#else
2939
        "lsr  r5, r5, r4\n\t"
2940
#endif
2941
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2942
        "lsls %[lo], %[lo], r3\n\t"
2943
#else
2944
        "lsl  %[lo], %[lo], r3\n\t"
2945
#endif
2946
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2947
        "lsrs r5, r5, #1\n\t"
2948
#else
2949
        "lsr  r5, r5, #1\n\t"
2950
#endif
2951
#if defined(WOLFSSL_KEIL)
2952
        "orrs %[hi], %[hi], r5\n\t"
2953
#elif defined(__clang__)
2954
        "orrs %[hi], r5\n\t"
2955
#else
2956
        "orr  %[hi], r5\n\t"
2957
#endif
2958
2959
        "movs   r3, #0\n\t"
2960
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2961
        "lsrs   r5, %[d], #1\n\t"
2962
#else
2963
        "lsr    r5, %[d], #1\n\t"
2964
#endif
2965
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2966
        "adds   r5, r5, #1\n\t"
2967
#else
2968
        "add    r5, r5, #1\n\t"
2969
#endif
2970
        "mov    r8, %[lo]\n\t"
2971
        "mov    r9, %[hi]\n\t"
2972
        /* Do top 32 */
2973
        "movs   r6, r5\n\t"
2974
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2975
        "subs   r6, r6, %[hi]\n\t"
2976
#else
2977
        "sub    r6, r6, %[hi]\n\t"
2978
#endif
2979
#ifdef WOLFSSL_KEIL
2980
        "sbcs   r6, r6, r6\n\t"
2981
#elif defined(__clang__)
2982
        "sbcs   r6, r6\n\t"
2983
#else
2984
        "sbc    r6, r6\n\t"
2985
#endif
2986
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2987
        "adds   r3, r3, r3\n\t"
2988
#else
2989
        "add    r3, r3, r3\n\t"
2990
#endif
2991
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2992
        "subs   r3, r3, r6\n\t"
2993
#else
2994
        "sub    r3, r3, r6\n\t"
2995
#endif
2996
#ifdef WOLFSSL_KEIL
2997
        "ands   r6, r6, r5\n\t"
2998
#elif defined(__clang__)
2999
        "ands   r6, r5\n\t"
3000
#else
3001
        "and    r6, r5\n\t"
3002
#endif
3003
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3004
        "subs   %[hi], %[hi], r6\n\t"
3005
#else
3006
        "sub    %[hi], %[hi], r6\n\t"
3007
#endif
3008
        "movs   r4, #29\n\t"
3009
        "\n"
3010
    "L_sp_div_word_loop%=:\n\t"
3011
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3012
        "lsls   %[lo], %[lo], #1\n\t"
3013
#else
3014
        "lsl    %[lo], %[lo], #1\n\t"
3015
#endif
3016
#ifdef WOLFSSL_KEIL
3017
        "adcs   %[hi], %[hi], %[hi]\n\t"
3018
#elif defined(__clang__)
3019
        "adcs   %[hi], %[hi]\n\t"
3020
#else
3021
        "adc    %[hi], %[hi]\n\t"
3022
#endif
3023
        "movs   r6, r5\n\t"
3024
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3025
        "subs   r6, r6, %[hi]\n\t"
3026
#else
3027
        "sub    r6, r6, %[hi]\n\t"
3028
#endif
3029
#ifdef WOLFSSL_KEIL
3030
        "sbcs   r6, r6, r6\n\t"
3031
#elif defined(__clang__)
3032
        "sbcs   r6, r6\n\t"
3033
#else
3034
        "sbc    r6, r6\n\t"
3035
#endif
3036
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3037
        "adds   r3, r3, r3\n\t"
3038
#else
3039
        "add    r3, r3, r3\n\t"
3040
#endif
3041
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3042
        "subs   r3, r3, r6\n\t"
3043
#else
3044
        "sub    r3, r3, r6\n\t"
3045
#endif
3046
#ifdef WOLFSSL_KEIL
3047
        "ands   r6, r6, r5\n\t"
3048
#elif defined(__clang__)
3049
        "ands   r6, r5\n\t"
3050
#else
3051
        "and    r6, r5\n\t"
3052
#endif
3053
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3054
        "subs   %[hi], %[hi], r6\n\t"
3055
#else
3056
        "sub    %[hi], %[hi], r6\n\t"
3057
#endif
3058
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3059
        "subs   r4, r4, #1\n\t"
3060
#else
3061
        "sub    r4, r4, #1\n\t"
3062
#endif
3063
        "bpl    L_sp_div_word_loop%=\n\t"
3064
        "movs   r7, #0\n\t"
3065
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3066
        "adds   r3, r3, r3\n\t"
3067
#else
3068
        "add    r3, r3, r3\n\t"
3069
#endif
3070
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3071
        "adds   r3, r3, #1\n\t"
3072
#else
3073
        "add    r3, r3, #1\n\t"
3074
#endif
3075
        /* r * d - Start */
3076
        "uxth   %[hi], r3\n\t"
3077
        "uxth   r4, %[d]\n\t"
3078
#ifdef WOLFSSL_KEIL
3079
        "muls   r4, %[hi], r4\n\t"
3080
#elif defined(__clang__)
3081
        "muls   r4, %[hi]\n\t"
3082
#else
3083
        "mul    r4, %[hi]\n\t"
3084
#endif
3085
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3086
        "lsrs   r6, %[d], #16\n\t"
3087
#else
3088
        "lsr    r6, %[d], #16\n\t"
3089
#endif
3090
#ifdef WOLFSSL_KEIL
3091
        "muls   %[hi], r6, %[hi]\n\t"
3092
#elif defined(__clang__)
3093
        "muls   %[hi], r6\n\t"
3094
#else
3095
        "mul    %[hi], r6\n\t"
3096
#endif
3097
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3098
        "lsrs   r5, %[hi], #16\n\t"
3099
#else
3100
        "lsr    r5, %[hi], #16\n\t"
3101
#endif
3102
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3103
        "lsls   %[hi], %[hi], #16\n\t"
3104
#else
3105
        "lsl    %[hi], %[hi], #16\n\t"
3106
#endif
3107
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3108
        "adds   r4, r4, %[hi]\n\t"
3109
#else
3110
        "add    r4, r4, %[hi]\n\t"
3111
#endif
3112
#ifdef WOLFSSL_KEIL
3113
        "adcs   r5, r5, r7\n\t"
3114
#elif defined(__clang__)
3115
        "adcs   r5, r7\n\t"
3116
#else
3117
        "adc    r5, r7\n\t"
3118
#endif
3119
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3120
        "lsrs   %[hi], r3, #16\n\t"
3121
#else
3122
        "lsr    %[hi], r3, #16\n\t"
3123
#endif
3124
#ifdef WOLFSSL_KEIL
3125
        "muls   r6, %[hi], r6\n\t"
3126
#elif defined(__clang__)
3127
        "muls   r6, %[hi]\n\t"
3128
#else
3129
        "mul    r6, %[hi]\n\t"
3130
#endif
3131
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3132
        "adds   r5, r5, r6\n\t"
3133
#else
3134
        "add    r5, r5, r6\n\t"
3135
#endif
3136
        "uxth   r6, %[d]\n\t"
3137
#ifdef WOLFSSL_KEIL
3138
        "muls   %[hi], r6, %[hi]\n\t"
3139
#elif defined(__clang__)
3140
        "muls   %[hi], r6\n\t"
3141
#else
3142
        "mul    %[hi], r6\n\t"
3143
#endif
3144
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3145
        "lsrs   r6, %[hi], #16\n\t"
3146
#else
3147
        "lsr    r6, %[hi], #16\n\t"
3148
#endif
3149
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3150
        "lsls   %[hi], %[hi], #16\n\t"
3151
#else
3152
        "lsl    %[hi], %[hi], #16\n\t"
3153
#endif
3154
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3155
        "adds   r4, r4, %[hi]\n\t"
3156
#else
3157
        "add    r4, r4, %[hi]\n\t"
3158
#endif
3159
#ifdef WOLFSSL_KEIL
3160
        "adcs   r5, r5, r6\n\t"
3161
#elif defined(__clang__)
3162
        "adcs   r5, r6\n\t"
3163
#else
3164
        "adc    r5, r6\n\t"
3165
#endif
3166
        /* r * d - Done */
3167
        "mov    %[hi], r8\n\t"
3168
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3169
        "subs   %[hi], %[hi], r4\n\t"
3170
#else
3171
        "sub    %[hi], %[hi], r4\n\t"
3172
#endif
3173
        "movs   r4, %[hi]\n\t"
3174
        "mov    %[hi], r9\n\t"
3175
#ifdef WOLFSSL_KEIL
3176
        "sbcs   %[hi], %[hi], r5\n\t"
3177
#elif defined(__clang__)
3178
        "sbcs   %[hi], r5\n\t"
3179
#else
3180
        "sbc    %[hi], r5\n\t"
3181
#endif
3182
        "movs   r5, %[hi]\n\t"
3183
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3184
        "adds   r3, r3, r5\n\t"
3185
#else
3186
        "add    r3, r3, r5\n\t"
3187
#endif
3188
        /* r * d - Start */
3189
        "uxth   %[hi], r3\n\t"
3190
        "uxth   r4, %[d]\n\t"
3191
#ifdef WOLFSSL_KEIL
3192
        "muls   r4, %[hi], r4\n\t"
3193
#elif defined(__clang__)
3194
        "muls   r4, %[hi]\n\t"
3195
#else
3196
        "mul    r4, %[hi]\n\t"
3197
#endif
3198
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3199
        "lsrs   r6, %[d], #16\n\t"
3200
#else
3201
        "lsr    r6, %[d], #16\n\t"
3202
#endif
3203
#ifdef WOLFSSL_KEIL
3204
        "muls   %[hi], r6, %[hi]\n\t"
3205
#elif defined(__clang__)
3206
        "muls   %[hi], r6\n\t"
3207
#else
3208
        "mul    %[hi], r6\n\t"
3209
#endif
3210
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3211
        "lsrs   r5, %[hi], #16\n\t"
3212
#else
3213
        "lsr    r5, %[hi], #16\n\t"
3214
#endif
3215
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3216
        "lsls   %[hi], %[hi], #16\n\t"
3217
#else
3218
        "lsl    %[hi], %[hi], #16\n\t"
3219
#endif
3220
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3221
        "adds   r4, r4, %[hi]\n\t"
3222
#else
3223
        "add    r4, r4, %[hi]\n\t"
3224
#endif
3225
#ifdef WOLFSSL_KEIL
3226
        "adcs   r5, r5, r7\n\t"
3227
#elif defined(__clang__)
3228
        "adcs   r5, r7\n\t"
3229
#else
3230
        "adc    r5, r7\n\t"
3231
#endif
3232
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3233
        "lsrs   %[hi], r3, #16\n\t"
3234
#else
3235
        "lsr    %[hi], r3, #16\n\t"
3236
#endif
3237
#ifdef WOLFSSL_KEIL
3238
        "muls   r6, %[hi], r6\n\t"
3239
#elif defined(__clang__)
3240
        "muls   r6, %[hi]\n\t"
3241
#else
3242
        "mul    r6, %[hi]\n\t"
3243
#endif
3244
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3245
        "adds   r5, r5, r6\n\t"
3246
#else
3247
        "add    r5, r5, r6\n\t"
3248
#endif
3249
        "uxth   r6, %[d]\n\t"
3250
#ifdef WOLFSSL_KEIL
3251
        "muls   %[hi], r6, %[hi]\n\t"
3252
#elif defined(__clang__)
3253
        "muls   %[hi], r6\n\t"
3254
#else
3255
        "mul    %[hi], r6\n\t"
3256
#endif
3257
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3258
        "lsrs   r6, %[hi], #16\n\t"
3259
#else
3260
        "lsr    r6, %[hi], #16\n\t"
3261
#endif
3262
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3263
        "lsls   %[hi], %[hi], #16\n\t"
3264
#else
3265
        "lsl    %[hi], %[hi], #16\n\t"
3266
#endif
3267
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3268
        "adds   r4, r4, %[hi]\n\t"
3269
#else
3270
        "add    r4, r4, %[hi]\n\t"
3271
#endif
3272
#ifdef WOLFSSL_KEIL
3273
        "adcs   r5, r5, r6\n\t"
3274
#elif defined(__clang__)
3275
        "adcs   r5, r6\n\t"
3276
#else
3277
        "adc    r5, r6\n\t"
3278
#endif
3279
        /* r * d - Done */
3280
        "mov    %[hi], r8\n\t"
3281
        "mov    r6, r9\n\t"
3282
#ifdef WOLFSSL_KEIL
3283
        "subs   r4, %[hi], r4\n\t"
3284
#else
3285
#ifdef __clang__
3286
        "subs   r4, %[hi], r4\n\t"
3287
#else
3288
        "sub    r4, %[hi], r4\n\t"
3289
#endif
3290
#endif
3291
#ifdef WOLFSSL_KEIL
3292
        "sbcs   r6, r6, r5\n\t"
3293
#elif defined(__clang__)
3294
        "sbcs   r6, r5\n\t"
3295
#else
3296
        "sbc    r6, r5\n\t"
3297
#endif
3298
        "movs   r5, r6\n\t"
3299
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3300
        "adds   r3, r3, r5\n\t"
3301
#else
3302
        "add    r3, r3, r5\n\t"
3303
#endif
3304
        /* r * d - Start */
3305
        "uxth   %[hi], r3\n\t"
3306
        "uxth   r4, %[d]\n\t"
3307
#ifdef WOLFSSL_KEIL
3308
        "muls   r4, %[hi], r4\n\t"
3309
#elif defined(__clang__)
3310
        "muls   r4, %[hi]\n\t"
3311
#else
3312
        "mul    r4, %[hi]\n\t"
3313
#endif
3314
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3315
        "lsrs   r6, %[d], #16\n\t"
3316
#else
3317
        "lsr    r6, %[d], #16\n\t"
3318
#endif
3319
#ifdef WOLFSSL_KEIL
3320
        "muls   %[hi], r6, %[hi]\n\t"
3321
#elif defined(__clang__)
3322
        "muls   %[hi], r6\n\t"
3323
#else
3324
        "mul    %[hi], r6\n\t"
3325
#endif
3326
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3327
        "lsrs   r5, %[hi], #16\n\t"
3328
#else
3329
        "lsr    r5, %[hi], #16\n\t"
3330
#endif
3331
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3332
        "lsls   %[hi], %[hi], #16\n\t"
3333
#else
3334
        "lsl    %[hi], %[hi], #16\n\t"
3335
#endif
3336
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3337
        "adds   r4, r4, %[hi]\n\t"
3338
#else
3339
        "add    r4, r4, %[hi]\n\t"
3340
#endif
3341
#ifdef WOLFSSL_KEIL
3342
        "adcs   r5, r5, r7\n\t"
3343
#elif defined(__clang__)
3344
        "adcs   r5, r7\n\t"
3345
#else
3346
        "adc    r5, r7\n\t"
3347
#endif
3348
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3349
        "lsrs   %[hi], r3, #16\n\t"
3350
#else
3351
        "lsr    %[hi], r3, #16\n\t"
3352
#endif
3353
#ifdef WOLFSSL_KEIL
3354
        "muls   r6, %[hi], r6\n\t"
3355
#elif defined(__clang__)
3356
        "muls   r6, %[hi]\n\t"
3357
#else
3358
        "mul    r6, %[hi]\n\t"
3359
#endif
3360
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3361
        "adds   r5, r5, r6\n\t"
3362
#else
3363
        "add    r5, r5, r6\n\t"
3364
#endif
3365
        "uxth   r6, %[d]\n\t"
3366
#ifdef WOLFSSL_KEIL
3367
        "muls   %[hi], r6, %[hi]\n\t"
3368
#elif defined(__clang__)
3369
        "muls   %[hi], r6\n\t"
3370
#else
3371
        "mul    %[hi], r6\n\t"
3372
#endif
3373
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3374
        "lsrs   r6, %[hi], #16\n\t"
3375
#else
3376
        "lsr    r6, %[hi], #16\n\t"
3377
#endif
3378
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3379
        "lsls   %[hi], %[hi], #16\n\t"
3380
#else
3381
        "lsl    %[hi], %[hi], #16\n\t"
3382
#endif
3383
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3384
        "adds   r4, r4, %[hi]\n\t"
3385
#else
3386
        "add    r4, r4, %[hi]\n\t"
3387
#endif
3388
#ifdef WOLFSSL_KEIL
3389
        "adcs   r5, r5, r6\n\t"
3390
#elif defined(__clang__)
3391
        "adcs   r5, r6\n\t"
3392
#else
3393
        "adc    r5, r6\n\t"
3394
#endif
3395
        /* r * d - Done */
3396
        "mov    %[hi], r8\n\t"
3397
        "mov    r6, r9\n\t"
3398
#ifdef WOLFSSL_KEIL
3399
        "subs   r4, %[hi], r4\n\t"
3400
#else
3401
#ifdef __clang__
3402
        "subs   r4, %[hi], r4\n\t"
3403
#else
3404
        "sub    r4, %[hi], r4\n\t"
3405
#endif
3406
#endif
3407
#ifdef WOLFSSL_KEIL
3408
        "sbcs   r6, r6, r5\n\t"
3409
#elif defined(__clang__)
3410
        "sbcs   r6, r5\n\t"
3411
#else
3412
        "sbc    r6, r5\n\t"
3413
#endif
3414
        "movs   r5, r6\n\t"
3415
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3416
        "adds   r3, r3, r5\n\t"
3417
#else
3418
        "add    r3, r3, r5\n\t"
3419
#endif
3420
        "movs   r6, %[d]\n\t"
3421
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3422
        "subs   r6, r6, r4\n\t"
3423
#else
3424
        "sub    r6, r6, r4\n\t"
3425
#endif
3426
#ifdef WOLFSSL_KEIL
3427
        "sbcs   r6, r6, r6\n\t"
3428
#elif defined(__clang__)
3429
        "sbcs   r6, r6\n\t"
3430
#else
3431
        "sbc    r6, r6\n\t"
3432
#endif
3433
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3434
        "subs   r3, r3, r6\n\t"
3435
#else
3436
        "sub    r3, r3, r6\n\t"
3437
#endif
3438
        "movs   %[hi], r3\n\t"
3439
        : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
3440
        :
3441
        : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
3442
    );
3443
    return (sp_uint32)(size_t)hi;
3444
}
3445
3446
#define SP_ASM_DIV_WORD
3447
#endif /* !WOLFSSL_SP_DIV_WORD_HALF */
3448
3449
#define SP_INT_ASM_AVAILABLE
3450
3451
    #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
3452
3453
    #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
3454
/*
3455
 * CPU: PPC64
3456
 */
3457
3458
    #ifdef __APPLE__
3459
3460
/* Multiply va by vb and store double size result in: vh | vl */
3461
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3462
    __asm__ __volatile__ (                               \
3463
        "mulld  %[l], %[a], %[b]  \n\t"            \
3464
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3465
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3466
        : [a] "r" (va), [b] "r" (vb)                     \
3467
        :                                                \
3468
    )
3469
/* Multiply va by vb and store double size result in: vo | vh | vl */
3470
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3471
    __asm__ __volatile__ (                               \
3472
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3473
        "mulld  %[l], %[a], %[b]  \n\t"            \
3474
        "li %[o], 0     \n\t"            \
3475
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3476
        : [a] "r" (va), [b] "r" (vb)                     \
3477
        :                                                \
3478
    )
3479
/* Multiply va by vb and add double size result into: vo | vh | vl */
3480
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3481
    __asm__ __volatile__ (                               \
3482
        "mulld  r16, %[a], %[b]   \n\t"            \
3483
        "mulhdu r17, %[a], %[b]   \n\t"            \
3484
        "addc %[l], %[l], r16   \n\t"            \
3485
        "adde %[h], %[h], r17   \n\t"            \
3486
        "addze  %[o], %[o]    \n\t"            \
3487
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3488
        : [a] "r" (va), [b] "r" (vb)                     \
3489
        : "r16", "r17", "cc"                             \
3490
    )
3491
/* Multiply va by vb and add double size result into: vh | vl */
3492
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3493
    __asm__ __volatile__ (                               \
3494
        "mulld  r16, %[a], %[b]   \n\t"            \
3495
        "mulhdu r17, %[a], %[b]   \n\t"            \
3496
        "addc %[l], %[l], r16   \n\t"            \
3497
        "adde %[h], %[h], r17   \n\t"            \
3498
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3499
        : [a] "r" (va), [b] "r" (vb)                     \
3500
        : "r16", "r17", "cc"                             \
3501
    )
3502
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3503
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3504
    __asm__ __volatile__ (                               \
3505
        "mulld  r16, %[a], %[b]   \n\t"            \
3506
        "mulhdu r17, %[a], %[b]   \n\t"            \
3507
        "addc %[l], %[l], r16   \n\t"            \
3508
        "adde %[h], %[h], r17   \n\t"            \
3509
        "addze  %[o], %[o]    \n\t"            \
3510
        "addc %[l], %[l], r16   \n\t"            \
3511
        "adde %[h], %[h], r17   \n\t"            \
3512
        "addze  %[o], %[o]    \n\t"            \
3513
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3514
        : [a] "r" (va), [b] "r" (vb)                     \
3515
        : "r16", "r17", "cc"                             \
3516
    )
3517
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3518
 * Assumes first add will not overflow vh | vl
3519
 */
3520
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3521
    __asm__ __volatile__ (                               \
3522
        "mulld  r16, %[a], %[b]   \n\t"            \
3523
        "mulhdu r17, %[a], %[b]   \n\t"            \
3524
        "addc %[l], %[l], r16   \n\t"            \
3525
        "adde %[h], %[h], r17   \n\t"            \
3526
        "addc %[l], %[l], r16   \n\t"            \
3527
        "adde %[h], %[h], r17   \n\t"            \
3528
        "addze  %[o], %[o]    \n\t"            \
3529
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3530
        : [a] "r" (va), [b] "r" (vb)                     \
3531
        : "r16", "r17", "cc"                             \
3532
    )
3533
/* Square va and store double size result in: vh | vl */
3534
#define SP_ASM_SQR(vl, vh, va)                           \
3535
    __asm__ __volatile__ (                               \
3536
        "mulld  %[l], %[a], %[a]  \n\t"            \
3537
        "mulhdu %[h], %[a], %[a]  \n\t"            \
3538
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3539
        : [a] "r" (va)                                   \
3540
        :                                                \
3541
    )
3542
/* Square va and add double size result into: vo | vh | vl */
3543
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3544
    __asm__ __volatile__ (                               \
3545
        "mulld  r16, %[a], %[a]   \n\t"            \
3546
        "mulhdu r17, %[a], %[a]   \n\t"            \
3547
        "addc %[l], %[l], r16   \n\t"            \
3548
        "adde %[h], %[h], r17   \n\t"            \
3549
        "addze  %[o], %[o]    \n\t"            \
3550
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3551
        : [a] "r" (va)                                   \
3552
        : "r16", "r17", "cc"                             \
3553
    )
3554
/* Square va and add double size result into: vh | vl */
3555
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3556
    __asm__ __volatile__ (                               \
3557
        "mulld  r16, %[a], %[a]   \n\t"            \
3558
        "mulhdu r17, %[a], %[a]   \n\t"            \
3559
        "addc %[l], %[l], r16   \n\t"            \
3560
        "adde %[h], %[h], r17   \n\t"            \
3561
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3562
        : [a] "r" (va)                                   \
3563
        : "r16", "r17", "cc"                             \
3564
    )
3565
/* Add va into: vh | vl */
3566
#define SP_ASM_ADDC(vl, vh, va)                          \
3567
    __asm__ __volatile__ (                               \
3568
        "addc %[l], %[l], %[a]  \n\t"            \
3569
        "addze  %[h], %[h]    \n\t"            \
3570
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3571
        : [a] "r" (va)                                   \
3572
        : "cc"                                           \
3573
    )
3574
/* Sub va from: vh | vl */
3575
#define SP_ASM_SUBB(vl, vh, va)                          \
3576
    __asm__ __volatile__ (                               \
3577
        "subfc  %[l], %[a], %[l]  \n\t"            \
3578
        "li    r16, 0     \n\t"            \
3579
        "subfe %[h], r16, %[h]    \n\t"            \
3580
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3581
        : [a] "r" (va)                                   \
3582
        : "r16", "cc"                                    \
3583
    )
3584
/* Add two times vc | vb | va into vo | vh | vl */
3585
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3586
    __asm__ __volatile__ (                               \
3587
        "addc %[l], %[l], %[a]  \n\t"            \
3588
        "adde %[h], %[h], %[b]  \n\t"            \
3589
        "adde %[o], %[o], %[c]  \n\t"            \
3590
        "addc %[l], %[l], %[a]  \n\t"            \
3591
        "adde %[h], %[h], %[b]  \n\t"            \
3592
        "adde %[o], %[o], %[c]  \n\t"            \
3593
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3594
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3595
        : "cc"                                           \
3596
    )
3597
/* Count leading zeros. */
3598
#define SP_ASM_LZCNT(va, vn)                             \
3599
    __asm__ __volatile__ (                               \
3600
        "cntlzd %[n], %[a]  \n\t"                    \
3601
        : [n] "=r" (vn)                                  \
3602
        : [a] "r" (va)                                   \
3603
        :                                                \
3604
    )
3605
3606
    #else  /* !defined(__APPLE__) */
3607
3608
/* Multiply va by vb and store double size result in: vh | vl */
3609
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3610
    __asm__ __volatile__ (                               \
3611
        "mulld  %[l], %[a], %[b]  \n\t"            \
3612
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3613
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3614
        : [a] "r" (va), [b] "r" (vb)                     \
3615
        :                                                \
3616
    )
3617
/* Multiply va by vb and store double size result in: vo | vh | vl */
3618
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3619
    __asm__ __volatile__ (                               \
3620
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3621
        "mulld  %[l], %[a], %[b]  \n\t"            \
3622
        "li %[o], 0     \n\t"            \
3623
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3624
        : [a] "r" (va), [b] "r" (vb)                     \
3625
        :                                                \
3626
    )
3627
/* Multiply va by vb and add double size result into: vo | vh | vl */
3628
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3629
    __asm__ __volatile__ (                               \
3630
        "mulld  16, %[a], %[b]    \n\t"            \
3631
        "mulhdu 17, %[a], %[b]    \n\t"            \
3632
        "addc %[l], %[l], 16    \n\t"            \
3633
        "adde %[h], %[h], 17    \n\t"            \
3634
        "addze  %[o], %[o]    \n\t"            \
3635
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3636
        : [a] "r" (va), [b] "r" (vb)                     \
3637
        : "16", "17", "cc"                               \
3638
    )
3639
/* Multiply va by vb and add double size result into: vh | vl */
3640
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3641
    __asm__ __volatile__ (                               \
3642
        "mulld  16, %[a], %[b]    \n\t"            \
3643
        "mulhdu 17, %[a], %[b]    \n\t"            \
3644
        "addc %[l], %[l], 16    \n\t"            \
3645
        "adde %[h], %[h], 17    \n\t"            \
3646
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3647
        : [a] "r" (va), [b] "r" (vb)                     \
3648
        : "16", "17", "cc"                               \
3649
    )
3650
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3651
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3652
    __asm__ __volatile__ (                               \
3653
        "mulld  16, %[a], %[b]    \n\t"            \
3654
        "mulhdu 17, %[a], %[b]    \n\t"            \
3655
        "addc %[l], %[l], 16    \n\t"            \
3656
        "adde %[h], %[h], 17    \n\t"            \
3657
        "addze  %[o], %[o]    \n\t"            \
3658
        "addc %[l], %[l], 16    \n\t"            \
3659
        "adde %[h], %[h], 17    \n\t"            \
3660
        "addze  %[o], %[o]    \n\t"            \
3661
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3662
        : [a] "r" (va), [b] "r" (vb)                     \
3663
        : "16", "17", "cc"                               \
3664
    )
3665
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3666
 * Assumes first add will not overflow vh | vl
3667
 */
3668
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3669
    __asm__ __volatile__ (                               \
3670
        "mulld  16, %[a], %[b]    \n\t"            \
3671
        "mulhdu 17, %[a], %[b]    \n\t"            \
3672
        "addc %[l], %[l], 16    \n\t"            \
3673
        "adde %[h], %[h], 17    \n\t"            \
3674
        "addc %[l], %[l], 16    \n\t"            \
3675
        "adde %[h], %[h], 17    \n\t"            \
3676
        "addze  %[o], %[o]    \n\t"            \
3677
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3678
        : [a] "r" (va), [b] "r" (vb)                     \
3679
        : "16", "17", "cc"                               \
3680
    )
3681
/* Square va and store double size result in: vh | vl */
3682
#define SP_ASM_SQR(vl, vh, va)                           \
3683
    __asm__ __volatile__ (                               \
3684
        "mulld  %[l], %[a], %[a]  \n\t"            \
3685
        "mulhdu %[h], %[a], %[a]  \n\t"            \
3686
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3687
        : [a] "r" (va)                                   \
3688
        :                                                \
3689
    )
3690
/* Square va and add double size result into: vo | vh | vl */
3691
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3692
    __asm__ __volatile__ (                               \
3693
        "mulld  16, %[a], %[a]    \n\t"            \
3694
        "mulhdu 17, %[a], %[a]    \n\t"            \
3695
        "addc %[l], %[l], 16    \n\t"            \
3696
        "adde %[h], %[h], 17    \n\t"            \
3697
        "addze  %[o], %[o]    \n\t"            \
3698
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3699
        : [a] "r" (va)                                   \
3700
        : "16", "17", "cc"                               \
3701
    )
3702
/* Square va and add double size result into: vh | vl */
3703
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3704
    __asm__ __volatile__ (                               \
3705
        "mulld  16, %[a], %[a]    \n\t"            \
3706
        "mulhdu 17, %[a], %[a]    \n\t"            \
3707
        "addc %[l], %[l], 16    \n\t"            \
3708
        "adde %[h], %[h], 17    \n\t"            \
3709
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3710
        : [a] "r" (va)                                   \
3711
        : "16", "17", "cc"                               \
3712
    )
3713
/* Add va into: vh | vl */
3714
#define SP_ASM_ADDC(vl, vh, va)                          \
3715
    __asm__ __volatile__ (                               \
3716
        "addc %[l], %[l], %[a]  \n\t"            \
3717
        "addze  %[h], %[h]    \n\t"            \
3718
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3719
        : [a] "r" (va)                                   \
3720
        : "cc"                                           \
3721
    )
3722
/* Sub va from: vh | vl */
3723
#define SP_ASM_SUBB(vl, vh, va)                          \
3724
    __asm__ __volatile__ (                               \
3725
        "subfc  %[l], %[a], %[l]  \n\t"            \
3726
        "li    16, 0      \n\t"            \
3727
        "subfe %[h], 16, %[h]   \n\t"            \
3728
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3729
        : [a] "r" (va)                                   \
3730
        : "16", "cc"                                     \
3731
    )
3732
/* Add two times vc | vb | va into vo | vh | vl */
3733
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3734
    __asm__ __volatile__ (                               \
3735
        "addc %[l], %[l], %[a]  \n\t"            \
3736
        "adde %[h], %[h], %[b]  \n\t"            \
3737
        "adde %[o], %[o], %[c]  \n\t"            \
3738
        "addc %[l], %[l], %[a]  \n\t"            \
3739
        "adde %[h], %[h], %[b]  \n\t"            \
3740
        "adde %[o], %[o], %[c]  \n\t"            \
3741
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3742
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3743
        : "cc"                                           \
3744
    )
3745
/* Count leading zeros. */
3746
#define SP_ASM_LZCNT(va, vn)                             \
3747
    __asm__ __volatile__ (                               \
3748
        "cntlzd %[n], %[a]  \n\t"                    \
3749
        : [n] "=r" (vn)                                  \
3750
        : [a] "r" (va)                                   \
3751
        :                                                \
3752
    )
3753
3754
    #endif /* !defined(__APPLE__) */
3755
3756
#define SP_INT_ASM_AVAILABLE
3757
3758
    #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
3759
3760
    #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
3761
/*
3762
 * CPU: PPC 32-bit
3763
 */
3764
3765
    #ifdef __APPLE__
3766
3767
/* Multiply va by vb and store double size result in: vh | vl */
3768
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3769
    __asm__ __volatile__ (                               \
3770
        "mullw  %[l], %[a], %[b]  \n\t"            \
3771
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3772
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3773
        : [a] "r" (va), [b] "r" (vb)                     \
3774
        :                                                \
3775
    )
3776
/* Multiply va by vb and store double size result in: vo | vh | vl */
3777
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3778
    __asm__ __volatile__ (                               \
3779
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3780
        "mullw  %[l], %[a], %[b]  \n\t"            \
3781
        "li %[o], 0     \n\t"            \
3782
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3783
        : [a] "r" (va), [b] "r" (vb)                     \
3784
    )
3785
/* Multiply va by vb and add double size result into: vo | vh | vl */
3786
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3787
    __asm__ __volatile__ (                               \
3788
        "mullw  r16, %[a], %[b]   \n\t"            \
3789
        "mulhwu r17, %[a], %[b]   \n\t"            \
3790
        "addc %[l], %[l], r16   \n\t"            \
3791
        "adde %[h], %[h], r17   \n\t"            \
3792
        "addze  %[o], %[o]    \n\t"            \
3793
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3794
        : [a] "r" (va), [b] "r" (vb)                     \
3795
        : "r16", "r17", "cc"                             \
3796
    )
3797
/* Multiply va by vb and add double size result into: vh | vl */
3798
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3799
    __asm__ __volatile__ (                               \
3800
        "mullw  r16, %[a], %[b]   \n\t"            \
3801
        "mulhwu r17, %[a], %[b]   \n\t"            \
3802
        "addc %[l], %[l], r16   \n\t"            \
3803
        "adde %[h], %[h], r17   \n\t"            \
3804
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3805
        : [a] "r" (va), [b] "r" (vb)                     \
3806
        : "r16", "r17", "cc"                             \
3807
    )
3808
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3809
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3810
    __asm__ __volatile__ (                               \
3811
        "mullw  r16, %[a], %[b]   \n\t"            \
3812
        "mulhwu r17, %[a], %[b]   \n\t"            \
3813
        "addc %[l], %[l], r16   \n\t"            \
3814
        "adde %[h], %[h], r17   \n\t"            \
3815
        "addze  %[o], %[o]    \n\t"            \
3816
        "addc %[l], %[l], r16   \n\t"            \
3817
        "adde %[h], %[h], r17   \n\t"            \
3818
        "addze  %[o], %[o]    \n\t"            \
3819
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3820
        : [a] "r" (va), [b] "r" (vb)                     \
3821
        : "r16", "r17", "cc"                             \
3822
    )
3823
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3824
 * Assumes first add will not overflow vh | vl
3825
 */
3826
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3827
    __asm__ __volatile__ (                               \
3828
        "mullw  r16, %[a], %[b]   \n\t"            \
3829
        "mulhwu r17, %[a], %[b]   \n\t"            \
3830
        "addc %[l], %[l], r16   \n\t"            \
3831
        "adde %[h], %[h], r17   \n\t"            \
3832
        "addc %[l], %[l], r16   \n\t"            \
3833
        "adde %[h], %[h], r17   \n\t"            \
3834
        "addze  %[o], %[o]    \n\t"            \
3835
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3836
        : [a] "r" (va), [b] "r" (vb)                     \
3837
        : "r16", "r17", "cc"                             \
3838
    )
3839
/* Square va and store double size result in: vh | vl */
3840
#define SP_ASM_SQR(vl, vh, va)                           \
3841
    __asm__ __volatile__ (                               \
3842
        "mullw  %[l], %[a], %[a]  \n\t"            \
3843
        "mulhwu %[h], %[a], %[a]  \n\t"            \
3844
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3845
        : [a] "r" (va)                                   \
3846
        :                                                \
3847
    )
3848
/* Square va and add double size result into: vo | vh | vl */
3849
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3850
    __asm__ __volatile__ (                               \
3851
        "mullw  r16, %[a], %[a]   \n\t"            \
3852
        "mulhwu r17, %[a], %[a]   \n\t"            \
3853
        "addc %[l], %[l], r16   \n\t"            \
3854
        "adde %[h], %[h], r17   \n\t"            \
3855
        "addze  %[o], %[o]    \n\t"            \
3856
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3857
        : [a] "r" (va)                                   \
3858
        : "r16", "r17", "cc"                             \
3859
    )
3860
/* Square va and add double size result into: vh | vl */
3861
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3862
    __asm__ __volatile__ (                               \
3863
        "mullw  r16, %[a], %[a]   \n\t"            \
3864
        "mulhwu r17, %[a], %[a]   \n\t"            \
3865
        "addc %[l], %[l], r16   \n\t"            \
3866
        "adde %[h], %[h], r17   \n\t"            \
3867
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3868
        : [a] "r" (va)                                   \
3869
        : "r16", "r17", "cc"                             \
3870
    )
3871
/* Add va into: vh | vl */
3872
#define SP_ASM_ADDC(vl, vh, va)                          \
3873
    __asm__ __volatile__ (                               \
3874
        "addc %[l], %[l], %[a]  \n\t"            \
3875
        "addze  %[h], %[h]    \n\t"            \
3876
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3877
        : [a] "r" (va)                                   \
3878
        : "cc"                                           \
3879
    )
3880
/* Sub va from: vh | vl */
3881
#define SP_ASM_SUBB(vl, vh, va)                          \
3882
    __asm__ __volatile__ (                               \
3883
        "subfc  %[l], %[a], %[l]  \n\t"            \
3884
        "li r16, 0      \n\t"            \
3885
        "subfe  %[h], r16, %[h]   \n\t"            \
3886
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3887
        : [a] "r" (va)                                   \
3888
        : "r16", "cc"                                    \
3889
    )
3890
/* Add two times vc | vb | va into vo | vh | vl */
3891
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3892
    __asm__ __volatile__ (                               \
3893
        "addc %[l], %[l], %[a]  \n\t"            \
3894
        "adde %[h], %[h], %[b]  \n\t"            \
3895
        "adde %[o], %[o], %[c]  \n\t"            \
3896
        "addc %[l], %[l], %[a]  \n\t"            \
3897
        "adde %[h], %[h], %[b]  \n\t"            \
3898
        "adde %[o], %[o], %[c]  \n\t"            \
3899
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3900
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3901
        : "cc"                                           \
3902
    )
3903
/* Count leading zeros. */
3904
#define SP_ASM_LZCNT(va, vn)                             \
3905
    __asm__ __volatile__ (                               \
3906
        "cntlzw %[n], %[a]  \n\t"                    \
3907
        : [n] "=r" (vn)                                  \
3908
        : [a] "r" (va)                                   \
3909
    )
3910
3911
    #else /* !defined(__APPLE__) */
3912
3913
/* Multiply va by vb and store double size result in: vh | vl */
3914
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3915
    __asm__ __volatile__ (                               \
3916
        "mullw  %[l], %[a], %[b]  \n\t"            \
3917
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3918
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3919
        : [a] "r" (va), [b] "r" (vb)                     \
3920
        :                                                \
3921
    )
3922
/* Multiply va by vb and store double size result in: vo | vh | vl */
3923
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3924
    __asm__ __volatile__ (                               \
3925
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3926
        "mullw  %[l], %[a], %[b]  \n\t"            \
3927
        "xor  %[o], %[o], %[o]  \n\t"            \
3928
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3929
        : [a] "r" (va), [b] "r" (vb)                     \
3930
    )
3931
/* Multiply va by vb and add double size result into: vo | vh | vl */
3932
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3933
    __asm__ __volatile__ (                               \
3934
        "mullw  16, %[a], %[b]    \n\t"            \
3935
        "mulhwu 17, %[a], %[b]    \n\t"            \
3936
        "addc %[l], %[l], 16    \n\t"            \
3937
        "adde %[h], %[h], 17    \n\t"            \
3938
        "addze  %[o], %[o]    \n\t"            \
3939
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3940
        : [a] "r" (va), [b] "r" (vb)                     \
3941
        : "16", "17", "cc"                               \
3942
    )
3943
/* Multiply va by vb and add double size result into: vh | vl */
3944
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3945
    __asm__ __volatile__ (                               \
3946
        "mullw  16, %[a], %[b]    \n\t"            \
3947
        "mulhwu 17, %[a], %[b]    \n\t"            \
3948
        "addc %[l], %[l], 16    \n\t"            \
3949
        "adde %[h], %[h], 17    \n\t"            \
3950
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3951
        : [a] "r" (va), [b] "r" (vb)                     \
3952
        : "16", "17", "cc"                               \
3953
    )
3954
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3955
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3956
    __asm__ __volatile__ (                               \
3957
        "mullw  16, %[a], %[b]    \n\t"            \
3958
        "mulhwu 17, %[a], %[b]    \n\t"            \
3959
        "addc %[l], %[l], 16    \n\t"            \
3960
        "adde %[h], %[h], 17    \n\t"            \
3961
        "addze  %[o], %[o]    \n\t"            \
3962
        "addc %[l], %[l], 16    \n\t"            \
3963
        "adde %[h], %[h], 17    \n\t"            \
3964
        "addze  %[o], %[o]    \n\t"            \
3965
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3966
        : [a] "r" (va), [b] "r" (vb)                     \
3967
        : "16", "17", "cc"                               \
3968
    )
3969
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3970
 * Assumes first add will not overflow vh | vl
3971
 */
3972
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3973
    __asm__ __volatile__ (                               \
3974
        "mullw  16, %[a], %[b]    \n\t"            \
3975
        "mulhwu 17, %[a], %[b]    \n\t"            \
3976
        "addc %[l], %[l], 16    \n\t"            \
3977
        "adde %[h], %[h], 17    \n\t"            \
3978
        "addc %[l], %[l], 16    \n\t"            \
3979
        "adde %[h], %[h], 17    \n\t"            \
3980
        "addze  %[o], %[o]    \n\t"            \
3981
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3982
        : [a] "r" (va), [b] "r" (vb)                     \
3983
        : "16", "17", "cc"                               \
3984
    )
3985
/* Square va and store double size result in: vh | vl */
3986
#define SP_ASM_SQR(vl, vh, va)                           \
3987
    __asm__ __volatile__ (                               \
3988
        "mullw  %[l], %[a], %[a]  \n\t"            \
3989
        "mulhwu %[h], %[a], %[a]  \n\t"            \
3990
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3991
        : [a] "r" (va)                                   \
3992
        :                                                \
3993
    )
3994
/* Square va and add double size result into: vo | vh | vl */
3995
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3996
    __asm__ __volatile__ (                               \
3997
        "mullw  16, %[a], %[a]    \n\t"            \
3998
        "mulhwu 17, %[a], %[a]    \n\t"            \
3999
        "addc %[l], %[l], 16    \n\t"            \
4000
        "adde %[h], %[h], 17    \n\t"            \
4001
        "addze  %[o], %[o]    \n\t"            \
4002
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4003
        : [a] "r" (va)                                   \
4004
        : "16", "17", "cc"                               \
4005
    )
4006
/* Square va and add double size result into: vh | vl */
4007
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4008
    __asm__ __volatile__ (                               \
4009
        "mullw  16, %[a], %[a]    \n\t"            \
4010
        "mulhwu 17, %[a], %[a]    \n\t"            \
4011
        "addc %[l], %[l], 16    \n\t"            \
4012
        "adde %[h], %[h], 17    \n\t"            \
4013
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4014
        : [a] "r" (va)                                   \
4015
        : "16", "17", "cc"                               \
4016
    )
4017
/* Add va into: vh | vl */
4018
#define SP_ASM_ADDC(vl, vh, va)                          \
4019
    __asm__ __volatile__ (                               \
4020
        "addc %[l], %[l], %[a]  \n\t"            \
4021
        "addze  %[h], %[h]    \n\t"            \
4022
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4023
        : [a] "r" (va)                                   \
4024
        : "cc"                                           \
4025
    )
4026
/* Sub va from: vh | vl */
4027
#define SP_ASM_SUBB(vl, vh, va)                          \
4028
    __asm__ __volatile__ (                               \
4029
        "subfc  %[l], %[a], %[l]  \n\t"            \
4030
        "xor  16, 16, 16    \n\t"            \
4031
        "subfe  %[h], 16, %[h]    \n\t"            \
4032
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4033
        : [a] "r" (va)                                   \
4034
        : "16", "cc"                                     \
4035
    )
4036
/* Add two times vc | vb | va into vo | vh | vl */
4037
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4038
    __asm__ __volatile__ (                               \
4039
        "addc %[l], %[l], %[a]  \n\t"            \
4040
        "adde %[h], %[h], %[b]  \n\t"            \
4041
        "adde %[o], %[o], %[c]  \n\t"            \
4042
        "addc %[l], %[l], %[a]  \n\t"            \
4043
        "adde %[h], %[h], %[b]  \n\t"            \
4044
        "adde %[o], %[o], %[c]  \n\t"            \
4045
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4046
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4047
        : "cc"                                           \
4048
    )
4049
/* Count leading zeros. */
4050
#define SP_ASM_LZCNT(va, vn)                             \
4051
    __asm__ __volatile__ (                               \
4052
        "cntlzw %[n], %[a]  \n\t"                    \
4053
        : [n] "=r" (vn)                                  \
4054
        : [a] "r" (va)                                   \
4055
    )
4056
4057
    #endif /* !defined(__APPLE__) */
4058
4059
#define SP_INT_ASM_AVAILABLE
4060
4061
    #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
4062
4063
    #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
4064
/*
4065
 * CPU: MIPS 64-bit
4066
 */
4067
4068
/* Multiply va by vb and store double size result in: vh | vl */
4069
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4070
    __asm__ __volatile__ (                               \
4071
        "dmultu %[a], %[b]    \n\t"            \
4072
        "mflo %[l]      \n\t"            \
4073
        "mfhi %[h]      \n\t"            \
4074
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4075
        : [a] "r" (va), [b] "r" (vb)                     \
4076
        : "$lo", "$hi"                                   \
4077
    )
4078
/* Multiply va by vb and store double size result in: vo | vh | vl */
4079
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4080
    __asm__ __volatile__ (                               \
4081
        "dmultu %[a], %[b]    \n\t"            \
4082
        "mflo %[l]      \n\t"            \
4083
        "mfhi %[h]      \n\t"            \
4084
        "move %[o], $0    \n\t"            \
4085
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4086
        : [a] "r" (va), [b] "r" (vb)                     \
4087
        : "$lo", "$hi"                                   \
4088
    )
4089
/* Multiply va by vb and add double size result into: vo | vh | vl */
4090
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4091
    __asm__ __volatile__ (                               \
4092
        "dmultu %[a], %[b]    \n\t"            \
4093
        "mflo $10     \n\t"            \
4094
        "mfhi $11     \n\t"            \
4095
        "daddu  %[l], %[l], $10   \n\t"            \
4096
        "sltu $12, %[l], $10    \n\t"            \
4097
        "daddu  %[h], %[h], $12   \n\t"            \
4098
        "sltu $12, %[h], $12    \n\t"            \
4099
        "daddu  %[o], %[o], $12   \n\t"            \
4100
        "daddu  %[h], %[h], $11   \n\t"            \
4101
        "sltu $12, %[h], $11    \n\t"            \
4102
        "daddu  %[o], %[o], $12   \n\t"            \
4103
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4104
        : [a] "r" (va), [b] "r" (vb)                     \
4105
        : "$10", "$11", "$12", "$lo", "$hi"              \
4106
    )
4107
/* Multiply va by vb and add double size result into: vh | vl */
4108
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4109
    __asm__ __volatile__ (                               \
4110
        "dmultu %[a], %[b]    \n\t"            \
4111
        "mflo $10     \n\t"            \
4112
        "mfhi $11     \n\t"            \
4113
        "daddu  %[l], %[l], $10   \n\t"            \
4114
        "sltu $12, %[l], $10    \n\t"            \
4115
        "daddu  %[h], %[h], $11   \n\t"            \
4116
        "daddu  %[h], %[h], $12   \n\t"            \
4117
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4118
        : [a] "r" (va), [b] "r" (vb)                     \
4119
        : "$10", "$11", "$12", "$lo", "$hi"              \
4120
    )
4121
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4122
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4123
    __asm__ __volatile__ (                               \
4124
        "dmultu %[a], %[b]    \n\t"            \
4125
        "mflo $10     \n\t"            \
4126
        "mfhi $11     \n\t"            \
4127
        "daddu  %[l], %[l], $10   \n\t"            \
4128
        "sltu $12, %[l], $10    \n\t"            \
4129
        "daddu  %[h], %[h], $12   \n\t"            \
4130
        "sltu $12, %[h], $12    \n\t"            \
4131
        "daddu  %[o], %[o], $12   \n\t"            \
4132
        "daddu  %[h], %[h], $11   \n\t"            \
4133
        "sltu $12, %[h], $11    \n\t"            \
4134
        "daddu  %[o], %[o], $12   \n\t"            \
4135
        "daddu  %[l], %[l], $10   \n\t"            \
4136
        "sltu $12, %[l], $10    \n\t"            \
4137
        "daddu  %[h], %[h], $12   \n\t"            \
4138
        "sltu $12, %[h], $12    \n\t"            \
4139
        "daddu  %[o], %[o], $12   \n\t"            \
4140
        "daddu  %[h], %[h], $11   \n\t"            \
4141
        "sltu $12, %[h], $11    \n\t"            \
4142
        "daddu  %[o], %[o], $12   \n\t"            \
4143
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4144
        : [a] "r" (va), [b] "r" (vb)                     \
4145
        : "$10", "$11", "$12", "$lo", "$hi"              \
4146
    )
4147
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4148
 * Assumes first add will not overflow vh | vl
4149
 */
4150
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4151
    __asm__ __volatile__ (                               \
4152
        "dmultu %[a], %[b]    \n\t"            \
4153
        "mflo $10     \n\t"            \
4154
        "mfhi $11     \n\t"            \
4155
        "daddu  %[l], %[l], $10   \n\t"            \
4156
        "sltu $12, %[l], $10    \n\t"            \
4157
        "daddu  %[h], %[h], $11   \n\t"            \
4158
        "daddu  %[h], %[h], $12   \n\t"            \
4159
        "daddu  %[l], %[l], $10   \n\t"            \
4160
        "sltu $12, %[l], $10    \n\t"            \
4161
        "daddu  %[h], %[h], $12   \n\t"            \
4162
        "sltu $12, %[h], $12    \n\t"            \
4163
        "daddu  %[o], %[o], $12   \n\t"            \
4164
        "daddu  %[h], %[h], $11   \n\t"            \
4165
        "sltu $12, %[h], $11    \n\t"            \
4166
        "daddu  %[o], %[o], $12   \n\t"            \
4167
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4168
        : [a] "r" (va), [b] "r" (vb)                     \
4169
        : "$10", "$11", "$12", "$lo", "$hi"              \
4170
    )
4171
/* Square va and store double size result in: vh | vl */
4172
#define SP_ASM_SQR(vl, vh, va)                           \
4173
    __asm__ __volatile__ (                               \
4174
        "dmultu %[a], %[a]    \n\t"            \
4175
        "mflo %[l]      \n\t"            \
4176
        "mfhi %[h]      \n\t"            \
4177
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4178
        : [a] "r" (va)                                   \
4179
        : "$lo", "$hi"                                   \
4180
    )
4181
/* Square va and add double size result into: vo | vh | vl */
4182
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4183
    __asm__ __volatile__ (                               \
4184
        "dmultu %[a], %[a]    \n\t"            \
4185
        "mflo $10     \n\t"            \
4186
        "mfhi $11     \n\t"            \
4187
        "daddu  %[l], %[l], $10   \n\t"            \
4188
        "sltu $12, %[l], $10    \n\t"            \
4189
        "daddu  %[h], %[h], $12   \n\t"            \
4190
        "sltu $12, %[h], $12    \n\t"            \
4191
        "daddu  %[o], %[o], $12   \n\t"            \
4192
        "daddu  %[h], %[h], $11   \n\t"            \
4193
        "sltu $12, %[h], $11    \n\t"            \
4194
        "daddu  %[o], %[o], $12   \n\t"            \
4195
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4196
        : [a] "r" (va)                                   \
4197
        : "$10", "$11", "$12", "$lo", "$hi"              \
4198
    )
4199
/* Square va and add double size result into: vh | vl */
4200
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4201
    __asm__ __volatile__ (                               \
4202
        "dmultu %[a], %[a]    \n\t"            \
4203
        "mflo $10     \n\t"            \
4204
        "mfhi $11     \n\t"            \
4205
        "daddu  %[l], %[l], $10   \n\t"            \
4206
        "sltu $12, %[l], $10    \n\t"            \
4207
        "daddu  %[h], %[h], $11   \n\t"            \
4208
        "daddu  %[h], %[h], $12   \n\t"            \
4209
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4210
        : [a] "r" (va)                                   \
4211
        : "$10", "$11", "$12", "$lo", "$hi"              \
4212
    )
4213
/* Add va into: vh | vl */
4214
#define SP_ASM_ADDC(vl, vh, va)                          \
4215
    __asm__ __volatile__ (                               \
4216
        "daddu  %[l], %[l], %[a]  \n\t"            \
4217
        "sltu $12, %[l], %[a]   \n\t"            \
4218
        "daddu  %[h], %[h], $12   \n\t"            \
4219
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4220
        : [a] "r" (va)                                   \
4221
        : "$12"                                          \
4222
    )
4223
/* Sub va from: vh | vl */
4224
#define SP_ASM_SUBB(vl, vh, va)                          \
4225
    __asm__ __volatile__ (                               \
4226
        "move $12, %[l]   \n\t"            \
4227
        "dsubu  %[l], $12, %[a]   \n\t"            \
4228
        "sltu $12, $12, %[l]    \n\t"            \
4229
        "dsubu  %[h], %[h], $12   \n\t"            \
4230
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4231
        : [a] "r" (va)                                   \
4232
        : "$12"                                          \
4233
    )
4234
/* Add two times vc | vb | va into vo | vh | vl */
4235
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4236
    __asm__ __volatile__ (                               \
4237
        "daddu  %[l], %[l], %[a]  \n\t"            \
4238
        "sltu $12, %[l], %[a]   \n\t"            \
4239
        "daddu  %[h], %[h], $12   \n\t"            \
4240
        "sltu $12, %[h], $12    \n\t"            \
4241
        "daddu  %[o], %[o], $12   \n\t"            \
4242
        "daddu  %[h], %[h], %[b]  \n\t"            \
4243
        "sltu $12, %[h], %[b]   \n\t"            \
4244
        "daddu  %[o], %[o], %[c]  \n\t"            \
4245
        "daddu  %[o], %[o], $12   \n\t"            \
4246
        "daddu  %[l], %[l], %[a]  \n\t"            \
4247
        "sltu $12, %[l], %[a]   \n\t"            \
4248
        "daddu  %[h], %[h], $12   \n\t"            \
4249
        "sltu $12, %[h], $12    \n\t"            \
4250
        "daddu  %[o], %[o], $12   \n\t"            \
4251
        "daddu  %[h], %[h], %[b]  \n\t"            \
4252
        "sltu $12, %[h], %[b]   \n\t"            \
4253
        "daddu  %[o], %[o], %[c]  \n\t"            \
4254
        "daddu  %[o], %[o], $12   \n\t"            \
4255
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4256
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4257
        : "$12"                                          \
4258
    )
4259
4260
#define SP_INT_ASM_AVAILABLE
4261
4262
    #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
4263
4264
    #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
4265
/*
4266
 * CPU: MIPS 32-bit
4267
 */
4268
4269
/* Multiply va by vb and store double size result in: vh | vl */
4270
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4271
    __asm__ __volatile__ (                               \
4272
        "multu  %[a], %[b]    \n\t"            \
4273
        "mflo %[l]      \n\t"            \
4274
        "mfhi %[h]      \n\t"            \
4275
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4276
        : [a] "r" (va), [b] "r" (vb)                     \
4277
        : "%lo", "%hi"                                   \
4278
    )
4279
/* Multiply va by vb and store double size result in: vo | vh | vl */
4280
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4281
    __asm__ __volatile__ (                               \
4282
        "multu  %[a], %[b]    \n\t"            \
4283
        "mflo %[l]      \n\t"            \
4284
        "mfhi %[h]      \n\t"            \
4285
        "move %[o], $0    \n\t"            \
4286
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4287
        : [a] "r" (va), [b] "r" (vb)                     \
4288
        : "%lo", "%hi"                                   \
4289
    )
4290
/* Multiply va by vb and add double size result into: vo | vh | vl */
4291
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4292
    __asm__ __volatile__ (                               \
4293
        "multu  %[a], %[b]    \n\t"            \
4294
        "mflo $10     \n\t"            \
4295
        "mfhi $11     \n\t"            \
4296
        "addu %[l], %[l], $10   \n\t"            \
4297
        "sltu $12, %[l], $10    \n\t"            \
4298
        "addu %[h], %[h], $12   \n\t"            \
4299
        "sltu $12, %[h], $12    \n\t"            \
4300
        "addu %[o], %[o], $12   \n\t"            \
4301
        "addu %[h], %[h], $11   \n\t"            \
4302
        "sltu $12, %[h], $11    \n\t"            \
4303
        "addu %[o], %[o], $12   \n\t"            \
4304
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4305
        : [a] "r" (va), [b] "r" (vb)                     \
4306
        : "$10", "$11", "$12", "%lo", "%hi"              \
4307
    )
4308
/* Multiply va by vb and add double size result into: vh | vl */
4309
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4310
    __asm__ __volatile__ (                               \
4311
        "multu  %[a], %[b]    \n\t"            \
4312
        "mflo $10     \n\t"            \
4313
        "mfhi $11     \n\t"            \
4314
        "addu %[l], %[l], $10   \n\t"            \
4315
        "sltu $12, %[l], $10    \n\t"            \
4316
        "addu %[h], %[h], $11   \n\t"            \
4317
        "addu %[h], %[h], $12   \n\t"            \
4318
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4319
        : [a] "r" (va), [b] "r" (vb)                     \
4320
        : "$10", "$11", "$12", "%lo", "%hi"              \
4321
    )
4322
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4323
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4324
    __asm__ __volatile__ (                               \
4325
        "multu  %[a], %[b]    \n\t"            \
4326
        "mflo $10     \n\t"            \
4327
        "mfhi $11     \n\t"            \
4328
        "addu %[l], %[l], $10   \n\t"            \
4329
        "sltu $12, %[l], $10    \n\t"            \
4330
        "addu %[h], %[h], $12   \n\t"            \
4331
        "sltu $12, %[h], $12    \n\t"            \
4332
        "addu %[o], %[o], $12   \n\t"            \
4333
        "addu %[h], %[h], $11   \n\t"            \
4334
        "sltu $12, %[h], $11    \n\t"            \
4335
        "addu %[o], %[o], $12   \n\t"            \
4336
        "addu %[l], %[l], $10   \n\t"            \
4337
        "sltu $12, %[l], $10    \n\t"            \
4338
        "addu %[h], %[h], $12   \n\t"            \
4339
        "sltu $12, %[h], $12    \n\t"            \
4340
        "addu %[o], %[o], $12   \n\t"            \
4341
        "addu %[h], %[h], $11   \n\t"            \
4342
        "sltu $12, %[h], $11    \n\t"            \
4343
        "addu %[o], %[o], $12   \n\t"            \
4344
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4345
        : [a] "r" (va), [b] "r" (vb)                     \
4346
        : "$10", "$11", "$12", "%lo", "%hi"              \
4347
    )
4348
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4349
 * Assumes first add will not overflow vh | vl
4350
 */
4351
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4352
    __asm__ __volatile__ (                               \
4353
        "multu  %[a], %[b]    \n\t"            \
4354
        "mflo $10     \n\t"            \
4355
        "mfhi $11     \n\t"            \
4356
        "addu %[l], %[l], $10   \n\t"            \
4357
        "sltu $12, %[l], $10    \n\t"            \
4358
        "addu %[h], %[h], $11   \n\t"            \
4359
        "addu %[h], %[h], $12   \n\t"            \
4360
        "addu %[l], %[l], $10   \n\t"            \
4361
        "sltu $12, %[l], $10    \n\t"            \
4362
        "addu %[h], %[h], $12   \n\t"            \
4363
        "sltu $12, %[h], $12    \n\t"            \
4364
        "addu %[o], %[o], $12   \n\t"            \
4365
        "addu %[h], %[h], $11   \n\t"            \
4366
        "sltu $12, %[h], $11    \n\t"            \
4367
        "addu %[o], %[o], $12   \n\t"            \
4368
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4369
        : [a] "r" (va), [b] "r" (vb)                     \
4370
        : "$10", "$11", "$12", "%lo", "%hi"              \
4371
    )
4372
/* Square va and store double size result in: vh | vl */
4373
#define SP_ASM_SQR(vl, vh, va)                           \
4374
    __asm__ __volatile__ (                               \
4375
        "multu  %[a], %[a]    \n\t"            \
4376
        "mflo %[l]      \n\t"            \
4377
        "mfhi %[h]      \n\t"            \
4378
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4379
        : [a] "r" (va)                                   \
4380
        : "%lo", "%hi"                                   \
4381
    )
4382
/* Square va and add double size result into: vo | vh | vl */
4383
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4384
    __asm__ __volatile__ (                               \
4385
        "multu  %[a], %[a]    \n\t"            \
4386
        "mflo $10     \n\t"            \
4387
        "mfhi $11     \n\t"            \
4388
        "addu %[l], %[l], $10   \n\t"            \
4389
        "sltu $12, %[l], $10    \n\t"            \
4390
        "addu %[h], %[h], $12   \n\t"            \
4391
        "sltu $12, %[h], $12    \n\t"            \
4392
        "addu %[o], %[o], $12   \n\t"            \
4393
        "addu %[h], %[h], $11   \n\t"            \
4394
        "sltu $12, %[h], $11    \n\t"            \
4395
        "addu %[o], %[o], $12   \n\t"            \
4396
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4397
        : [a] "r" (va)                                   \
4398
        : "$10", "$11", "$12", "%lo", "%hi"              \
4399
    )
4400
/* Square va and add double size result into: vh | vl */
4401
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4402
    __asm__ __volatile__ (                               \
4403
        "multu  %[a], %[a]    \n\t"            \
4404
        "mflo $10     \n\t"            \
4405
        "mfhi $11     \n\t"            \
4406
        "addu %[l], %[l], $10   \n\t"            \
4407
        "sltu $12, %[l], $10    \n\t"            \
4408
        "addu %[h], %[h], $11   \n\t"            \
4409
        "addu %[h], %[h], $12   \n\t"            \
4410
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4411
        : [a] "r" (va)                                   \
4412
        : "$10", "$11", "$12", "%lo", "%hi"              \
4413
    )
4414
/* Add va into: vh | vl */
4415
#define SP_ASM_ADDC(vl, vh, va)                          \
4416
    __asm__ __volatile__ (                               \
4417
        "addu %[l], %[l], %[a]  \n\t"            \
4418
        "sltu $12, %[l], %[a]   \n\t"            \
4419
        "addu %[h], %[h], $12   \n\t"            \
4420
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4421
        : [a] "r" (va)                                   \
4422
        : "$12"                                          \
4423
    )
4424
/* Sub va from: vh | vl */
4425
#define SP_ASM_SUBB(vl, vh, va)                          \
4426
    __asm__ __volatile__ (                               \
4427
        "move $12, %[l]   \n\t"            \
4428
        "subu %[l], $12, %[a]   \n\t"            \
4429
        "sltu $12, $12, %[l]    \n\t"            \
4430
        "subu %[h], %[h], $12   \n\t"            \
4431
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4432
        : [a] "r" (va)                                   \
4433
        : "$12"                                          \
4434
    )
4435
/* Add two times vc | vb | va into vo | vh | vl */
4436
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4437
    __asm__ __volatile__ (                               \
4438
        "addu %[l], %[l], %[a]  \n\t"            \
4439
        "sltu $12, %[l], %[a]   \n\t"            \
4440
        "addu %[h], %[h], $12   \n\t"            \
4441
        "sltu $12, %[h], $12    \n\t"            \
4442
        "addu %[o], %[o], $12   \n\t"            \
4443
        "addu %[h], %[h], %[b]  \n\t"            \
4444
        "sltu $12, %[h], %[b]   \n\t"            \
4445
        "addu %[o], %[o], %[c]  \n\t"            \
4446
        "addu %[o], %[o], $12   \n\t"            \
4447
        "addu %[l], %[l], %[a]  \n\t"            \
4448
        "sltu $12, %[l], %[a]   \n\t"            \
4449
        "addu %[h], %[h], $12   \n\t"            \
4450
        "sltu $12, %[h], $12    \n\t"            \
4451
        "addu %[o], %[o], $12   \n\t"            \
4452
        "addu %[h], %[h], %[b]  \n\t"            \
4453
        "sltu $12, %[h], %[b]   \n\t"            \
4454
        "addu %[o], %[o], %[c]  \n\t"            \
4455
        "addu %[o], %[o], $12   \n\t"            \
4456
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4457
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4458
        : "$12"                                          \
4459
    )
4460
4461
#define SP_INT_ASM_AVAILABLE
4462
4463
    #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
4464
4465
    #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
4466
/*
4467
 * CPU: RISCV 64-bit
4468
 */
4469
4470
/* Multiply va by vb and store double size result in: vh | vl */
4471
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4472
    __asm__ __volatile__ (                               \
4473
        "mul  %[l], %[a], %[b]  \n\t"            \
4474
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4475
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4476
        : [a] "r" (va), [b] "r" (vb)                     \
4477
        :                                                \
4478
    )
4479
/* Multiply va by vb and store double size result in: vo | vh | vl */
4480
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4481
    __asm__ __volatile__ (                               \
4482
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4483
        "mul  %[l], %[a], %[b]  \n\t"            \
4484
        "add  %[o], zero, zero  \n\t"            \
4485
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4486
        : [a] "r" (va), [b] "r" (vb)                     \
4487
        :                                                \
4488
    )
4489
/* Multiply va by vb and add double size result into: vo | vh | vl */
4490
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4491
    __asm__ __volatile__ (                               \
4492
        "mul  a5, %[a], %[b]    \n\t"            \
4493
        "mulhu  a6, %[a], %[b]    \n\t"            \
4494
        "add  %[l], %[l], a5    \n\t"            \
4495
        "sltu a7, %[l], a5    \n\t"            \
4496
        "add  %[h], %[h], a7    \n\t"            \
4497
        "sltu a7, %[h], a7    \n\t"            \
4498
        "add  %[o], %[o], a7    \n\t"            \
4499
        "add  %[h], %[h], a6    \n\t"            \
4500
        "sltu a7, %[h], a6    \n\t"            \
4501
        "add  %[o], %[o], a7    \n\t"            \
4502
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4503
        : [a] "r" (va), [b] "r" (vb)                     \
4504
        : "a5", "a6", "a7"                               \
4505
    )
4506
/* Multiply va by vb and add double size result into: vh | vl */
4507
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4508
    __asm__ __volatile__ (                               \
4509
        "mul  a5, %[a], %[b]    \n\t"            \
4510
        "mulhu  a6, %[a], %[b]    \n\t"            \
4511
        "add  %[l], %[l], a5    \n\t"            \
4512
        "sltu a7, %[l], a5    \n\t"            \
4513
        "add  %[h], %[h], a6    \n\t"            \
4514
        "add  %[h], %[h], a7    \n\t"            \
4515
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4516
        : [a] "r" (va), [b] "r" (vb)                     \
4517
        : "a5", "a6", "a7"                               \
4518
    )
4519
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4520
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4521
    __asm__ __volatile__ (                               \
4522
        "mul  a5, %[a], %[b]    \n\t"            \
4523
        "mulhu  a6, %[a], %[b]    \n\t"            \
4524
        "add  %[l], %[l], a5    \n\t"            \
4525
        "sltu a7, %[l], a5    \n\t"            \
4526
        "add  %[h], %[h], a7    \n\t"            \
4527
        "sltu a7, %[h], a7    \n\t"            \
4528
        "add  %[o], %[o], a7    \n\t"            \
4529
        "add  %[h], %[h], a6    \n\t"            \
4530
        "sltu a7, %[h], a6    \n\t"            \
4531
        "add  %[o], %[o], a7    \n\t"            \
4532
        "add  %[l], %[l], a5    \n\t"            \
4533
        "sltu a7, %[l], a5    \n\t"            \
4534
        "add  %[h], %[h], a7    \n\t"            \
4535
        "sltu a7, %[h], a7    \n\t"            \
4536
        "add  %[o], %[o], a7    \n\t"            \
4537
        "add  %[h], %[h], a6    \n\t"            \
4538
        "sltu a7, %[h], a6    \n\t"            \
4539
        "add  %[o], %[o], a7    \n\t"            \
4540
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4541
        : [a] "r" (va), [b] "r" (vb)                     \
4542
        : "a5", "a6", "a7"                               \
4543
    )
4544
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4545
 * Assumes first add will not overflow vh | vl
4546
 */
4547
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4548
    __asm__ __volatile__ (                               \
4549
        "mul  a5, %[a], %[b]    \n\t"            \
4550
        "mulhu  a6, %[a], %[b]    \n\t"            \
4551
        "add  %[l], %[l], a5    \n\t"            \
4552
        "sltu a7, %[l], a5    \n\t"            \
4553
        "add  %[h], %[h], a6    \n\t"            \
4554
        "add  %[h], %[h], a7    \n\t"            \
4555
        "add  %[l], %[l], a5    \n\t"            \
4556
        "sltu a7, %[l], a5    \n\t"            \
4557
        "add  %[h], %[h], a7    \n\t"            \
4558
        "sltu a7, %[h], a7    \n\t"            \
4559
        "add  %[o], %[o], a7    \n\t"            \
4560
        "add  %[h], %[h], a6    \n\t"            \
4561
        "sltu a7, %[h], a6    \n\t"            \
4562
        "add  %[o], %[o], a7    \n\t"            \
4563
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4564
        : [a] "r" (va), [b] "r" (vb)                     \
4565
        : "a5", "a6", "a7"                               \
4566
    )
4567
/* Square va and store double size result in: vh | vl */
4568
#define SP_ASM_SQR(vl, vh, va)                           \
4569
    __asm__ __volatile__ (                               \
4570
        "mul  %[l], %[a], %[a]  \n\t"            \
4571
        "mulhu  %[h], %[a], %[a]  \n\t"            \
4572
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4573
        : [a] "r" (va)                                   \
4574
        :                                                \
4575
    )
4576
/* Square va and add double size result into: vo | vh | vl */
4577
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4578
    __asm__ __volatile__ (                               \
4579
        "mul  a5, %[a], %[a]    \n\t"            \
4580
        "mulhu  a6, %[a], %[a]    \n\t"            \
4581
        "add  %[l], %[l], a5    \n\t"            \
4582
        "sltu a7, %[l], a5    \n\t"            \
4583
        "add  %[h], %[h], a7    \n\t"            \
4584
        "sltu a7, %[h], a7    \n\t"            \
4585
        "add  %[o], %[o], a7    \n\t"            \
4586
        "add  %[h], %[h], a6    \n\t"            \
4587
        "sltu a7, %[h], a6    \n\t"            \
4588
        "add  %[o], %[o], a7    \n\t"            \
4589
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4590
        : [a] "r" (va)                                   \
4591
        : "a5", "a6", "a7"                               \
4592
    )
4593
/* Square va and add double size result into: vh | vl */
4594
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4595
    __asm__ __volatile__ (                               \
4596
        "mul  a5, %[a], %[a]    \n\t"            \
4597
        "mulhu  a6, %[a], %[a]    \n\t"            \
4598
        "add  %[l], %[l], a5    \n\t"            \
4599
        "sltu a7, %[l], a5    \n\t"            \
4600
        "add  %[h], %[h], a6    \n\t"            \
4601
        "add  %[h], %[h], a7    \n\t"            \
4602
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4603
        : [a] "r" (va)                                   \
4604
        : "a5", "a6", "a7"                               \
4605
    )
4606
/* Add va into: vh | vl */
4607
#define SP_ASM_ADDC(vl, vh, va)                          \
4608
    __asm__ __volatile__ (                               \
4609
        "add  %[l], %[l], %[a]  \n\t"            \
4610
        "sltu a7, %[l], %[a]    \n\t"            \
4611
        "add  %[h], %[h], a7    \n\t"            \
4612
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4613
        : [a] "r" (va)                                   \
4614
        : "a7"                                           \
4615
    )
4616
/* Sub va from: vh | vl */
4617
#define SP_ASM_SUBB(vl, vh, va)                          \
4618
    __asm__ __volatile__ (                               \
4619
        "add  a7, %[l], zero    \n\t"            \
4620
        "sub  %[l], a7, %[a]    \n\t"            \
4621
        "sltu a7, a7, %[l]    \n\t"            \
4622
        "sub  %[h], %[h], a7    \n\t"            \
4623
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4624
        : [a] "r" (va)                                   \
4625
        : "a7"                                           \
4626
    )
4627
/* Add two times vc | vb | va into vo | vh | vl */
4628
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4629
    __asm__ __volatile__ (                               \
4630
        "add  %[l], %[l], %[a]  \n\t"            \
4631
        "sltu a7, %[l], %[a]    \n\t"            \
4632
        "add  %[h], %[h], a7    \n\t"            \
4633
        "sltu a7, %[h], a7    \n\t"            \
4634
        "add  %[o], %[o], a7    \n\t"            \
4635
        "add  %[h], %[h], %[b]  \n\t"            \
4636
        "sltu a7, %[h], %[b]    \n\t"            \
4637
        "add  %[o], %[o], %[c]  \n\t"            \
4638
        "add  %[o], %[o], a7    \n\t"            \
4639
        "add  %[l], %[l], %[a]  \n\t"            \
4640
        "sltu a7, %[l], %[a]    \n\t"            \
4641
        "add  %[h], %[h], a7    \n\t"            \
4642
        "sltu a7, %[h], a7    \n\t"            \
4643
        "add  %[o], %[o], a7    \n\t"            \
4644
        "add  %[h], %[h], %[b]  \n\t"            \
4645
        "sltu a7, %[h], %[b]    \n\t"            \
4646
        "add  %[o], %[o], %[c]  \n\t"            \
4647
        "add  %[o], %[o], a7    \n\t"            \
4648
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4649
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4650
        : "a7"                                           \
4651
    )
4652
4653
#define SP_INT_ASM_AVAILABLE
4654
4655
    #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
4656
4657
    #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
4658
/*
4659
 * CPU: RISCV 32-bit
4660
 */
4661
4662
/* Multiply va by vb and store double size result in: vh | vl */
4663
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4664
    __asm__ __volatile__ (                               \
4665
        "mul  %[l], %[a], %[b]  \n\t"            \
4666
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4667
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4668
        : [a] "r" (va), [b] "r" (vb)                     \
4669
        :                                                \
4670
    )
4671
/* Multiply va by vb and store double size result in: vo | vh | vl */
4672
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4673
    __asm__ __volatile__ (                               \
4674
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4675
        "mul  %[l], %[a], %[b]  \n\t"            \
4676
        "add  %[o], zero, zero  \n\t"            \
4677
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4678
        : [a] "r" (va), [b] "r" (vb)                     \
4679
        :                                                \
4680
    )
4681
/* Multiply va by vb and add double size result into: vo | vh | vl */
4682
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4683
    __asm__ __volatile__ (                               \
4684
        "mul  a5, %[a], %[b]    \n\t"            \
4685
        "mulhu  a6, %[a], %[b]    \n\t"            \
4686
        "add  %[l], %[l], a5    \n\t"            \
4687
        "sltu a7, %[l], a5    \n\t"            \
4688
        "add  %[h], %[h], a7    \n\t"            \
4689
        "sltu a7, %[h], a7    \n\t"            \
4690
        "add  %[o], %[o], a7    \n\t"            \
4691
        "add  %[h], %[h], a6    \n\t"            \
4692
        "sltu a7, %[h], a6    \n\t"            \
4693
        "add  %[o], %[o], a7    \n\t"            \
4694
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4695
        : [a] "r" (va), [b] "r" (vb)                     \
4696
        : "a5", "a6", "a7"                               \
4697
    )
4698
/* Multiply va by vb and add double size result into: vh | vl */
4699
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4700
    __asm__ __volatile__ (                               \
4701
        "mul  a5, %[a], %[b]    \n\t"            \
4702
        "mulhu  a6, %[a], %[b]    \n\t"            \
4703
        "add  %[l], %[l], a5    \n\t"            \
4704
        "sltu a7, %[l], a5    \n\t"            \
4705
        "add  %[h], %[h], a6    \n\t"            \
4706
        "add  %[h], %[h], a7    \n\t"            \
4707
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4708
        : [a] "r" (va), [b] "r" (vb)                     \
4709
        : "a5", "a6", "a7"                               \
4710
    )
4711
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4712
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4713
    __asm__ __volatile__ (                               \
4714
        "mul  a5, %[a], %[b]    \n\t"            \
4715
        "mulhu  a6, %[a], %[b]    \n\t"            \
4716
        "add  %[l], %[l], a5    \n\t"            \
4717
        "sltu a7, %[l], a5    \n\t"            \
4718
        "add  %[h], %[h], a7    \n\t"            \
4719
        "sltu a7, %[h], a7    \n\t"            \
4720
        "add  %[o], %[o], a7    \n\t"            \
4721
        "add  %[h], %[h], a6    \n\t"            \
4722
        "sltu a7, %[h], a6    \n\t"            \
4723
        "add  %[o], %[o], a7    \n\t"            \
4724
        "add  %[l], %[l], a5    \n\t"            \
4725
        "sltu a7, %[l], a5    \n\t"            \
4726
        "add  %[h], %[h], a7    \n\t"            \
4727
        "sltu a7, %[h], a7    \n\t"            \
4728
        "add  %[o], %[o], a7    \n\t"            \
4729
        "add  %[h], %[h], a6    \n\t"            \
4730
        "sltu a7, %[h], a6    \n\t"            \
4731
        "add  %[o], %[o], a7    \n\t"            \
4732
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4733
        : [a] "r" (va), [b] "r" (vb)                     \
4734
        : "a5", "a6", "a7"                               \
4735
    )
4736
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4737
 * Assumes first add will not overflow vh | vl
4738
 */
4739
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4740
    __asm__ __volatile__ (                               \
4741
        "mul  a5, %[a], %[b]    \n\t"            \
4742
        "mulhu  a6, %[a], %[b]    \n\t"            \
4743
        "add  %[l], %[l], a5    \n\t"            \
4744
        "sltu a7, %[l], a5    \n\t"            \
4745
        "add  %[h], %[h], a6    \n\t"            \
4746
        "add  %[h], %[h], a7    \n\t"            \
4747
        "add  %[l], %[l], a5    \n\t"            \
4748
        "sltu a7, %[l], a5    \n\t"            \
4749
        "add  %[h], %[h], a7    \n\t"            \
4750
        "sltu a7, %[h], a7    \n\t"            \
4751
        "add  %[o], %[o], a7    \n\t"            \
4752
        "add  %[h], %[h], a6    \n\t"            \
4753
        "sltu a7, %[h], a6    \n\t"            \
4754
        "add  %[o], %[o], a7    \n\t"            \
4755
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4756
        : [a] "r" (va), [b] "r" (vb)                     \
4757
        : "a5", "a6", "a7"                               \
4758
    )
4759
/* Square va and store double size result in: vh | vl */
4760
#define SP_ASM_SQR(vl, vh, va)                           \
4761
    __asm__ __volatile__ (                               \
4762
        "mul  %[l], %[a], %[a]  \n\t"            \
4763
        "mulhu  %[h], %[a], %[a]  \n\t"            \
4764
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4765
        : [a] "r" (va)                                   \
4766
        :                                                \
4767
    )
4768
/* Square va and add double size result into: vo | vh | vl */
4769
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4770
    __asm__ __volatile__ (                               \
4771
        "mul  a5, %[a], %[a]    \n\t"            \
4772
        "mulhu  a6, %[a], %[a]    \n\t"            \
4773
        "add  %[l], %[l], a5    \n\t"            \
4774
        "sltu a7, %[l], a5    \n\t"            \
4775
        "add  %[h], %[h], a7    \n\t"            \
4776
        "sltu a7, %[h], a7    \n\t"            \
4777
        "add  %[o], %[o], a7    \n\t"            \
4778
        "add  %[h], %[h], a6    \n\t"            \
4779
        "sltu a7, %[h], a6    \n\t"            \
4780
        "add  %[o], %[o], a7    \n\t"            \
4781
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4782
        : [a] "r" (va)                                   \
4783
        : "a5", "a6", "a7"                               \
4784
    )
4785
/* Square va and add double size result into: vh | vl */
4786
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4787
    __asm__ __volatile__ (                               \
4788
        "mul  a5, %[a], %[a]    \n\t"            \
4789
        "mulhu  a6, %[a], %[a]    \n\t"            \
4790
        "add  %[l], %[l], a5    \n\t"            \
4791
        "sltu a7, %[l], a5    \n\t"            \
4792
        "add  %[h], %[h], a6    \n\t"            \
4793
        "add  %[h], %[h], a7    \n\t"            \
4794
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4795
        : [a] "r" (va)                                   \
4796
        : "a5", "a6", "a7"                               \
4797
    )
4798
/* Add va into: vh | vl */
4799
#define SP_ASM_ADDC(vl, vh, va)                          \
4800
    __asm__ __volatile__ (                               \
4801
        "add  %[l], %[l], %[a]  \n\t"            \
4802
        "sltu a7, %[l], %[a]    \n\t"            \
4803
        "add  %[h], %[h], a7    \n\t"            \
4804
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4805
        : [a] "r" (va)                                   \
4806
        : "a7"                                           \
4807
    )
4808
/* Sub va from: vh | vl */
4809
#define SP_ASM_SUBB(vl, vh, va)                          \
4810
    __asm__ __volatile__ (                               \
4811
        "add  a7, %[l], zero    \n\t"            \
4812
        "sub  %[l], a7, %[a]    \n\t"            \
4813
        "sltu a7, a7, %[l]    \n\t"            \
4814
        "sub  %[h], %[h], a7    \n\t"            \
4815
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4816
        : [a] "r" (va)                                   \
4817
        : "a7"                                           \
4818
    )
4819
/* Add two times vc | vb | va into vo | vh | vl */
4820
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4821
    __asm__ __volatile__ (                               \
4822
        "add  %[l], %[l], %[a]  \n\t"            \
4823
        "sltu a7, %[l], %[a]    \n\t"            \
4824
        "add  %[h], %[h], a7    \n\t"            \
4825
        "sltu a7, %[h], a7    \n\t"            \
4826
        "add  %[o], %[o], a7    \n\t"            \
4827
        "add  %[h], %[h], %[b]  \n\t"            \
4828
        "sltu a7, %[h], %[b]    \n\t"            \
4829
        "add  %[o], %[o], %[c]  \n\t"            \
4830
        "add  %[o], %[o], a7    \n\t"            \
4831
        "add  %[l], %[l], %[a]  \n\t"            \
4832
        "sltu a7, %[l], %[a]    \n\t"            \
4833
        "add  %[h], %[h], a7    \n\t"            \
4834
        "sltu a7, %[h], a7    \n\t"            \
4835
        "add  %[o], %[o], a7    \n\t"            \
4836
        "add  %[h], %[h], %[b]  \n\t"            \
4837
        "sltu a7, %[h], %[b]    \n\t"            \
4838
        "add  %[o], %[o], %[c]  \n\t"            \
4839
        "add  %[o], %[o], a7    \n\t"            \
4840
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4841
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4842
        : "a7"                                           \
4843
    )
4844
4845
#define SP_INT_ASM_AVAILABLE
4846
4847
    #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
4848
4849
    #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
4850
/*
4851
 * CPU: Intel s390x
4852
 */
4853
4854
/* Multiply va by vb and store double size result in: vh | vl */
4855
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4856
    __asm__ __volatile__ (                               \
4857
        "lgr  %%r1, %[a]    \n\t"            \
4858
        "mlgr %%r0, %[b]    \n\t"            \
4859
        "lgr  %[l], %%r1    \n\t"            \
4860
        "lgr  %[h], %%r0    \n\t"            \
4861
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4862
        : [a] "r" (va), [b] "r" (vb)                     \
4863
        : "r0", "r1"                                     \
4864
    )
4865
/* Multiply va by vb and store double size result in: vo | vh | vl */
4866
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4867
    __asm__ __volatile__ (                               \
4868
        "lgr  %%r1, %[a]    \n\t"            \
4869
        "mlgr %%r0, %[b]    \n\t"            \
4870
        "lghi %[o], 0     \n\t"            \
4871
        "lgr  %[l], %%r1    \n\t"            \
4872
        "lgr  %[h], %%r0    \n\t"            \
4873
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4874
        : [a] "r" (va), [b] "r" (vb)                     \
4875
        : "r0", "r1"                                     \
4876
    )
4877
/* Multiply va by vb and add double size result into: vo | vh | vl */
4878
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4879
    __asm__ __volatile__ (                               \
4880
        "lghi %%r10, 0  \n\t"                    \
4881
        "lgr  %%r1, %[a]    \n\t"            \
4882
        "mlgr %%r0, %[b]    \n\t"            \
4883
        "algr %[l], %%r1  \n\t"                    \
4884
        "alcgr  %[h], %%r0  \n\t"                    \
4885
        "alcgr  %[o], %%r10 \n\t"                    \
4886
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4887
        : [a] "r" (va), [b] "r" (vb)                     \
4888
        : "r0", "r1", "r10", "cc"                        \
4889
    )
4890
/* Multiply va by vb and add double size result into: vh | vl */
4891
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4892
    __asm__ __volatile__ (                               \
4893
        "lgr  %%r1, %[a]    \n\t"            \
4894
        "mlgr %%r0, %[b]    \n\t"            \
4895
        "algr %[l], %%r1  \n\t"                    \
4896
        "alcgr  %[h], %%r0  \n\t"                    \
4897
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4898
        : [a] "r" (va), [b] "r" (vb)                     \
4899
        : "r0", "r1", "cc"                               \
4900
    )
4901
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4902
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4903
    __asm__ __volatile__ (                               \
4904
        "lghi %%r10, 0  \n\t"                    \
4905
        "lgr  %%r1, %[a]    \n\t"            \
4906
        "mlgr %%r0, %[b]    \n\t"            \
4907
        "algr %[l], %%r1  \n\t"                    \
4908
        "alcgr  %[h], %%r0  \n\t"                    \
4909
        "alcgr  %[o], %%r10 \n\t"                    \
4910
        "algr %[l], %%r1  \n\t"                    \
4911
        "alcgr  %[h], %%r0  \n\t"                    \
4912
        "alcgr  %[o], %%r10 \n\t"                    \
4913
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4914
        : [a] "r" (va), [b] "r" (vb)                     \
4915
        : "r0", "r1", "r10", "cc"                        \
4916
    )
4917
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4918
 * Assumes first add will not overflow vh | vl
4919
 */
4920
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4921
    __asm__ __volatile__ (                               \
4922
        "lghi %%r10, 0  \n\t"                    \
4923
        "lgr  %%r1, %[a]    \n\t"            \
4924
        "mlgr %%r0, %[b]    \n\t"            \
4925
        "algr %[l], %%r1  \n\t"                    \
4926
        "alcgr  %[h], %%r0  \n\t"                    \
4927
        "algr %[l], %%r1  \n\t"                    \
4928
        "alcgr  %[h], %%r0  \n\t"                    \
4929
        "alcgr  %[o], %%r10 \n\t"                    \
4930
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4931
        : [a] "r" (va), [b] "r" (vb)                     \
4932
        : "r0", "r1", "r10", "cc"                        \
4933
    )
4934
/* Square va and store double size result in: vh | vl */
4935
#define SP_ASM_SQR(vl, vh, va)                           \
4936
    __asm__ __volatile__ (                               \
4937
        "lgr  %%r1, %[a]    \n\t"            \
4938
        "mlgr %%r0, %%r1    \n\t"            \
4939
        "lgr  %[l], %%r1    \n\t"            \
4940
        "lgr  %[h], %%r0    \n\t"            \
4941
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4942
        : [a] "r" (va)                                   \
4943
        : "r0", "r1"                                     \
4944
    )
4945
/* Square va and add double size result into: vo | vh | vl */
4946
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4947
    __asm__ __volatile__ (                               \
4948
        "lghi %%r10, 0  \n\t"                    \
4949
        "lgr  %%r1, %[a]    \n\t"            \
4950
        "mlgr %%r0, %%r1    \n\t"            \
4951
        "algr %[l], %%r1  \n\t"                    \
4952
        "alcgr  %[h], %%r0  \n\t"                    \
4953
        "alcgr  %[o], %%r10 \n\t"                    \
4954
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4955
        : [a] "r" (va)                                   \
4956
        : "r0", "r1", "r10", "cc"                        \
4957
    )
4958
/* Square va and add double size result into: vh | vl */
4959
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4960
    __asm__ __volatile__ (                               \
4961
        "lgr  %%r1, %[a]    \n\t"            \
4962
        "mlgr %%r0, %%r1    \n\t"            \
4963
        "algr %[l], %%r1  \n\t"                    \
4964
        "alcgr  %[h], %%r0  \n\t"                    \
4965
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4966
        : [a] "r" (va)                                   \
4967
        : "r0", "r1", "cc"                               \
4968
    )
4969
/* Add va into: vh | vl */
4970
#define SP_ASM_ADDC(vl, vh, va)                          \
4971
    __asm__ __volatile__ (                               \
4972
        "lghi %%r10, 0  \n\t"                    \
4973
        "algr %[l], %[a]  \n\t"                    \
4974
        "alcgr  %[h], %%r10 \n\t"                    \
4975
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4976
        : [a] "r" (va)                                   \
4977
        : "r10", "cc"                                    \
4978
    )
4979
/* Sub va from: vh | vl */
4980
#define SP_ASM_SUBB(vl, vh, va)                          \
4981
    __asm__ __volatile__ (                               \
4982
        "lghi %%r10, 0  \n\t"                    \
4983
        "slgr %[l], %[a]  \n\t"                    \
4984
        "slbgr  %[h], %%r10 \n\t"                    \
4985
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4986
        : [a] "r" (va)                                   \
4987
        : "r10", "cc"                                    \
4988
    )
4989
/* Add two times vc | vb | va into vo | vh | vl */
4990
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4991
    __asm__ __volatile__ (                               \
4992
        "algr %[l], %[a]  \n\t"                    \
4993
        "alcgr  %[h], %[b]  \n\t"                    \
4994
        "alcgr  %[o], %[c]  \n\t"                    \
4995
        "algr %[l], %[a]  \n\t"                    \
4996
        "alcgr  %[h], %[b]  \n\t"                    \
4997
        "alcgr  %[o], %[c]  \n\t"                    \
4998
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4999
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
5000
        : "cc"                                           \
5001
    )
5002
5003
#define SP_INT_ASM_AVAILABLE
5004
5005
    #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
5006
5007
#ifdef SP_INT_ASM_AVAILABLE
5008
    #ifndef SP_INT_NO_ASM
5009
        #define SQR_MUL_ASM
5010
    #endif
5011
    #ifndef SP_ASM_ADDC_REG
5012
        #define SP_ASM_ADDC_REG  SP_ASM_ADDC
5013
    #endif /* SP_ASM_ADDC_REG */
5014
    #ifndef SP_ASM_SUBB_REG
5015
        #define SP_ASM_SUBB_REG  SP_ASM_SUBB
5016
    #endif /* SP_ASM_ADDC_REG */
5017
#endif /* SQR_MUL_ASM */
5018
5019
#endif /* !WOLFSSL_NO_ASM */
5020
5021
5022
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
5023
    !defined(NO_DSA) || !defined(NO_DH) || \
5024
    (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
5025
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
5026
#ifndef WC_NO_CACHE_RESISTANT
5027
    /* Mask of address for constant time operations. */
5028
    const size_t sp_off_on_addr[2] =
5029
    {
5030
        (size_t) 0,
5031
        (size_t)-1
5032
    };
5033
#endif
5034
#endif
5035
5036
5037
#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
5038
5039
#ifdef __cplusplus
5040
extern "C" {
5041
#endif
5042
5043
/* Modular exponentiation implementations using Single Precision. */
5044
WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
5045
    sp_int* res);
5046
WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
5047
    sp_int* res);
5048
WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
5049
    sp_int* res);
5050
WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
5051
    sp_int* res);
5052
WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
5053
    sp_int* res);
5054
5055
#ifdef __cplusplus
5056
} /* extern "C" */
5057
#endif
5058
5059
#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
5060
5061
5062
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5063
    defined(OPENSSL_ALL)
5064
static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct);
5065
#endif
5066
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5067
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
5068
    defined(OPENSSL_ALL)
5069
static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho);
5070
#endif
5071
5072
5073
/* Set the multi-precision number to zero.
5074
 *
5075
 * Assumes a is not NULL.
5076
 *
5077
 * @param  [out]  a  SP integer to set to zero.
5078
 */
5079
static void _sp_zero(sp_int* a)
5080
127M
{
5081
127M
    sp_int_minimal* am = (sp_int_minimal *)a;
5082
5083
127M
    am->used = 0;
5084
127M
    am->dp[0] = 0;
5085
127M
#ifdef WOLFSSL_SP_INT_NEGATIVE
5086
127M
    am->sign = MP_ZPOS;
5087
127M
#endif
5088
127M
}
5089
5090
5091
/* Initialize the multi-precision number to be zero with a given max size.
5092
 *
5093
 * @param  [out]  a     SP integer.
5094
 * @param  [in]   size  Number of words to say are available.
5095
 */
5096
static void _sp_init_size(sp_int* a, unsigned int size)
5097
93.1M
{
5098
93.1M
    volatile sp_int_minimal* am = (sp_int_minimal *)a;
5099
5100
#ifdef HAVE_WOLF_BIGINT
5101
    wc_bigint_init((struct WC_BIGINT*)&am->raw);
5102
#endif
5103
93.1M
    _sp_zero((sp_int*)am);
5104
5105
93.1M
    am->size = (sp_size_t)size;
5106
93.1M
}
5107
5108
/* Initialize the multi-precision number to be zero with a given max size.
5109
 *
5110
 * @param  [out]  a     SP integer.
5111
 * @param  [in]   size  Number of words to say are available.
5112
 *
5113
 * @return  MP_OKAY on success.
5114
 * @return  MP_VAL when a is NULL.
5115
 */
5116
int sp_init_size(sp_int* a, unsigned int size)
5117
29.2M
{
5118
29.2M
    int err = MP_OKAY;
5119
5120
    /* Validate parameters. Don't use size more than max compiled. */
5121
29.2M
    if ((a == NULL) || ((size == 0) || (size > SP_INT_DIGITS))) {
5122
92.0k
        err = MP_VAL;
5123
92.0k
    }
5124
5125
29.2M
    if (err == MP_OKAY) {
5126
29.1M
        _sp_init_size(a, size);
5127
29.1M
    }
5128
5129
29.2M
    return err;
5130
29.2M
}
5131
5132
/* Initialize the multi-precision number to be zero.
5133
 *
5134
 * @param  [out]  a  SP integer.
5135
 *
5136
 * @return  MP_OKAY on success.
5137
 * @return  MP_VAL when a is NULL.
5138
 */
5139
int sp_init(sp_int* a)
5140
731k
{
5141
731k
    int err = MP_OKAY;
5142
5143
    /* Validate parameter. */
5144
731k
    if (a == NULL) {
5145
0
        err = MP_VAL;
5146
0
    }
5147
731k
    else {
5148
        /* Assume complete sp_int with SP_INT_DIGITS digits. */
5149
731k
        _sp_init_size(a, SP_INT_DIGITS);
5150
731k
    }
5151
5152
731k
    return err;
5153
731k
}
5154
5155
#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
5156
/* Initialize up to six multi-precision numbers to be zero.
5157
 *
5158
 * @param  [out]  n1  SP integer.
5159
 * @param  [out]  n2  SP integer.
5160
 * @param  [out]  n3  SP integer.
5161
 * @param  [out]  n4  SP integer.
5162
 * @param  [out]  n5  SP integer.
5163
 * @param  [out]  n6  SP integer.
5164
 *
5165
 * @return  MP_OKAY on success.
5166
 */
5167
int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
5168
    sp_int* n6)
5169
412k
{
5170
    /* Initialize only those pointers that are valid. */
5171
412k
    if (n1 != NULL) {
5172
411k
        _sp_init_size(n1, SP_INT_DIGITS);
5173
411k
    }
5174
412k
    if (n2 != NULL) {
5175
411k
        _sp_init_size(n2, SP_INT_DIGITS);
5176
411k
    }
5177
412k
    if (n3 != NULL) {
5178
364k
        _sp_init_size(n3, SP_INT_DIGITS);
5179
364k
    }
5180
412k
    if (n4 != NULL) {
5181
96.4k
        _sp_init_size(n4, SP_INT_DIGITS);
5182
96.4k
    }
5183
412k
    if (n5 != NULL) {
5184
24.5k
        _sp_init_size(n5, SP_INT_DIGITS);
5185
24.5k
    }
5186
412k
    if (n6 != NULL) {
5187
18.9k
        _sp_init_size(n6, SP_INT_DIGITS);
5188
18.9k
    }
5189
5190
412k
    return MP_OKAY;
5191
412k
}
5192
#endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
5193
5194
/* Free the memory allocated in the multi-precision number.
5195
 *
5196
 * @param  [in]  a  SP integer.
5197
 */
5198
void sp_free(sp_int* a)
5199
30.8M
{
5200
30.8M
    if (a != NULL) {
5201
    #ifdef HAVE_WOLF_BIGINT
5202
        wc_bigint_free(&a->raw);
5203
    #endif
5204
30.8M
    }
5205
30.8M
}
5206
5207
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5208
    !defined(NO_DH) || defined(HAVE_ECC)
5209
/* Grow multi-precision number to be able to hold l digits.
5210
 * This function does nothing as the number of digits is fixed.
5211
 *
5212
 * @param  [in,out]  a  SP integer.
5213
 * @param  [in]      l  Number of digits to grow to.
5214
 *
5215
 * @return  MP_OKAY on success
5216
 * @return  MP_MEM if the number of digits requested is more than available.
5217
 */
5218
int sp_grow(sp_int* a, int l)
5219
104k
{
5220
104k
    int err = MP_OKAY;
5221
5222
    /* Validate parameter. */
5223
104k
    if ((a == NULL) || (l < 0)) {
5224
0
        err = MP_VAL;
5225
0
    }
5226
    /* Ensure enough words allocated for grow. */
5227
104k
    if ((err == MP_OKAY) && ((unsigned int)l > a->size)) {
5228
34
        err = MP_MEM;
5229
34
    }
5230
104k
    if (err == MP_OKAY) {
5231
104k
        unsigned int i;
5232
5233
        /* Put in zeros up to the new length. */
5234
390k
        for (i = a->used; i < (unsigned int)l; i++) {
5235
286k
            a->dp[i] = 0;
5236
286k
        }
5237
104k
    }
5238
5239
104k
    return err;
5240
104k
}
5241
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || HAVE_ECC */
5242
5243
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5244
    defined(HAVE_ECC) || defined(WOLFSSL_PUBLIC_MP)
5245
/* Set the multi-precision number to zero.
5246
 *
5247
 * @param  [out]  a  SP integer to set to zero.
5248
 */
5249
void sp_zero(sp_int* a)
5250
1.15k
{
5251
    /* Make an sp_int with valid pointer zero. */
5252
1.15k
    if (a != NULL) {
5253
1.15k
        _sp_zero(a);
5254
1.15k
    }
5255
1.15k
}
5256
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
5257
5258
/* Clear the data from the multi-precision number, set to zero and free.
5259
 *
5260
 * @param  [out]  a  SP integer.
5261
 */
5262
void sp_clear(sp_int* a)
5263
30.6M
{
5264
#ifdef HAVE_FIPS
5265
    sp_forcezero(a);
5266
#else
5267
    /* Clear when valid pointer passed in. */
5268
30.6M
    if (a != NULL) {
5269
30.6M
        unsigned int i;
5270
5271
        /* Only clear the digits being used. */
5272
263M
        for (i = 0; i < a->used; i++) {
5273
232M
            a->dp[i] = 0;
5274
232M
        }
5275
        /* Set back to zero and free. */
5276
30.6M
        _sp_zero(a);
5277
30.6M
        sp_free(a);
5278
30.6M
    }
5279
30.6M
#endif
5280
30.6M
}
5281
5282
#if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || \
5283
    !defined(NO_DSA) || defined(WOLFSSL_SP_PRIME_GEN)
5284
/* Ensure the data in the multi-precision number is zeroed.
5285
 *
5286
 * Use when security sensitive data needs to be wiped.
5287
 *
5288
 * @param  [in]  a  SP integer.
5289
 */
5290
void sp_forcezero(sp_int* a)
5291
183k
{
5292
    /* Zeroize when a vald pointer passed in. */
5293
183k
    if (a != NULL) {
5294
        /* Ensure all data zeroized - data not zeroed when used decreases. */
5295
183k
        ForceZero(a->dp, a->size * (word32)SP_WORD_SIZEOF);
5296
        /* Set back to zero. */
5297
    #ifdef HAVE_WOLF_BIGINT
5298
        /* Zeroize the raw data as well. */
5299
        wc_bigint_zero(&a->raw);
5300
    #endif
5301
        /* Make value zero and free. */
5302
183k
        _sp_zero(a);
5303
183k
        sp_free(a);
5304
183k
    }
5305
183k
}
5306
#endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
5307
5308
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5309
    !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
5310
/* Copy value of multi-precision number a into r.
5311
 *
5312
 * @param  [in]   a  SP integer - source.
5313
 * @param  [out]  r  SP integer - destination.
5314
 */
5315
static void _sp_copy(const sp_int* a, sp_int* r)
5316
371M
{
5317
    /* Copy words across. */
5318
371M
    if (a->used == 0) {
5319
1.90M
        r->dp[0] = 0;
5320
1.90M
    }
5321
369M
    else {
5322
369M
        XMEMCPY(r->dp, a->dp, a->used * (word32)SP_WORD_SIZEOF);
5323
369M
    }
5324
    /* Set number of used words in result. */
5325
371M
    r->used = a->used;/* // NOLINT(clang-analyzer-core.uninitialized.Assign) */
5326
371M
#ifdef WOLFSSL_SP_INT_NEGATIVE
5327
    /* Set sign of result. */
5328
371M
    r->sign = a->sign;/* // NOLINT(clang-analyzer-core.uninitialized.Assign) */
5329
371M
#endif
5330
371M
}
5331
5332
/* Copy value of multi-precision number a into r.
5333
 *
5334
 * @param  [in]   a  SP integer - source.
5335
 * @param  [out]  r  SP integer - destination.
5336
 *
5337
 * @return  MP_OKAY on success.
5338
 */
5339
int sp_copy(const sp_int* a, sp_int* r)
5340
45.4M
{
5341
45.4M
    int err = MP_OKAY;
5342
5343
    /* Validate parameters. */
5344
45.4M
    if ((a == NULL) || (r == NULL)) {
5345
0
        err = MP_VAL;
5346
0
    }
5347
    /* Only copy if different pointers. */
5348
45.4M
    if (a != r) {
5349
        /* Validated space in result. */
5350
2.48M
        if ((err == MP_OKAY) && (a->used > r->size)) {
5351
1.47k
            err = MP_VAL;
5352
1.47k
        }
5353
2.48M
        if (err == MP_OKAY) {
5354
2.47M
            _sp_copy(a, r);
5355
2.47M
        }
5356
2.48M
    }
5357
5358
45.4M
    return err;
5359
45.4M
}
5360
#endif
5361
5362
#if ((defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5363
      !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
5364
     defined(OPENSSL_ALL)) && defined(WC_PROTECT_ENCRYPTED_MEM)
5365
5366
/* Copy 2 numbers into two results based on y. Copy a fixed number of digits.
5367
 *
5368
 * Constant time implementation.
5369
 * When y is 0, r1 = a2 and r2 = a1.
5370
 * When y is 1, r1 = a1 and r2 = a2.
5371
 *
5372
 * @param [in]  a1    First number to copy.
5373
 * @param [in]  a2    Second number to copy.
5374
 * @param [out] r1    First result number to copy into.
5375
 * @param [out] r2    Second result number to copy into.
5376
 * @param [in]  y     Indicates which number goes into which result number.
5377
 * @param [in]  used  Number of digits to copy.
5378
 */
5379
static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1,
5380
    sp_int* r2, int y, unsigned int used)
5381
{
5382
    unsigned int i;
5383
5384
    /* Copy data - constant time. */
5385
    for (i = 0; i < used; i++) {
5386
        r1->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y  ])) +
5387
                    (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1]));
5388
        r2->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1])) +
5389
                    (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y  ]));
5390
    }
5391
    /* Copy used. */
5392
    r1->used = (a1->used & ((int)wc_off_on_addr[y  ])) +
5393
               (a2->used & ((int)wc_off_on_addr[y^1]));
5394
    r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) +
5395
               (a2->used & ((int)wc_off_on_addr[y  ]));
5396
#ifdef WOLFSSL_SP_INT_NEGATIVE
5397
    /* Copy sign. */
5398
    r1->sign = (a1->sign & ((int)wc_off_on_addr[y  ])) +
5399
               (a2->sign & ((int)wc_off_on_addr[y^1]));
5400
    r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) +
5401
               (a2->sign & ((int)wc_off_on_addr[y  ]));
5402
#endif
5403
}
5404
5405
#endif
5406
5407
#if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
5408
/* Initializes r and copies in value from a.
5409
 *
5410
 * @param  [out]  r  SP integer - destination.
5411
 * @param  [in]   a  SP integer - source.
5412
 *
5413
 * @return  MP_OKAY on success.
5414
 * @return  MP_VAL when a or r is NULL.
5415
 */
5416
int sp_init_copy(sp_int* r, const sp_int* a)
5417
0
{
5418
0
    int err;
5419
5420
    /* Initialize r and copy value in a into it. */
5421
0
    err = sp_init(r);
5422
0
    if (err == MP_OKAY) {
5423
0
        err = sp_copy(a, r);
5424
0
    }
5425
5426
0
    return err;
5427
0
}
5428
#endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
5429
5430
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5431
    !defined(NO_DH) || !defined(NO_DSA)
5432
/* Exchange the values in a and b.
5433
 *
5434
 * Avoid using this API as three copy operations are performed.
5435
 *
5436
 * @param  [in,out]  a  SP integer to swap.
5437
 * @param  [in,out]  b  SP integer to swap.
5438
 *
5439
 * @return  MP_OKAY on success.
5440
 * @return  MP_VAL when a or b is NULL.
5441
 * @return  MP_MEM when dynamic memory allocation fails.
5442
 */
5443
int sp_exch(sp_int* a, sp_int* b)
5444
167
{
5445
167
    int err = MP_OKAY;
5446
5447
    /* Validate parameters. */
5448
167
    if ((a == NULL) || (b == NULL)) {
5449
0
        err = MP_VAL;
5450
0
    }
5451
    /* Check space for a in b and b in a. */
5452
167
    if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
5453
23
        err = MP_VAL;
5454
23
    }
5455
5456
167
    if (err == MP_OKAY) {
5457
        /* Declare temporary for swapping. */
5458
144
        DECL_SP_INT(t, a->used);
5459
5460
        /* Create temporary for swapping. */
5461
144
        ALLOC_SP_INT(t, a->used, err, NULL);
5462
144
        if (err == MP_OKAY) {
5463
            /* Cache allocated size of a and b. */
5464
103
            sp_size_t asize = a->size;
5465
103
            sp_size_t bsize = b->size;
5466
            /* Copy all of SP int: t <- a, a <- b, b <- t. */
5467
103
            XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
5468
103
            XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
5469
103
            XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
5470
            /* Put back size of a and b. */
5471
103
            a->size = asize;
5472
103
            b->size = bsize;
5473
103
        }
5474
5475
144
        FREE_SP_INT(t, NULL);
5476
144
    }
5477
5478
167
    return err;
5479
167
}
5480
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
5481
        * !NO_DSA */
5482
5483
#if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
5484
    !defined(WC_NO_CACHE_RESISTANT)
5485
/* Conditional swap of SP int values in constant time.
5486
 *
5487
 * @param [in]  a     First SP int to conditionally swap.
5488
 * @param [in]  b     Second SP int to conditionally swap.
5489
 * @param [in]  cnt   Count of words to copy.
5490
 * @param [in]  swap  When value is 1 then swap.
5491
 * @param [in]  t     Temporary SP int to use in swap.
5492
 * @return  MP_OKAY on success.
5493
 * @return  MP_MEM when dynamic memory allocation fails.
5494
 */
5495
int sp_cond_swap_ct_ex(sp_int* a, sp_int* b, int cnt, int swap, sp_int* t)
5496
19.9M
{
5497
19.9M
    unsigned int i;
5498
19.9M
    volatile sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)swap;
5499
5500
    /* XOR other fields in sp_int into temp - mask set when swapping. */
5501
19.9M
    t->used = (a->used ^ b->used) & (sp_size_t)mask;
5502
19.9M
#ifdef WOLFSSL_SP_INT_NEGATIVE
5503
19.9M
    t->sign = (a->sign ^ b->sign) & (sp_uint8)mask;
5504
19.9M
#endif
5505
5506
    /* XOR requested words into temp - mask set when swapping. */
5507
183M
    for (i = 0; i < (unsigned int)cnt; i++) {
5508
163M
        t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
5509
163M
    }
5510
5511
    /* XOR temporary - when mask set then result will be b. */
5512
19.9M
    a->used ^= t->used;
5513
19.9M
#ifdef WOLFSSL_SP_INT_NEGATIVE
5514
19.9M
    a->sign ^= t->sign;
5515
19.9M
#endif
5516
183M
    for (i = 0; i < (unsigned int)cnt; i++) {
5517
163M
        a->dp[i] ^= t->dp[i];
5518
163M
    }
5519
5520
    /* XOR temporary - when mask set then result will be a. */
5521
19.9M
    b->used ^= t->used;
5522
19.9M
#ifdef WOLFSSL_SP_INT_NEGATIVE
5523
19.9M
    b->sign ^= b->sign;
5524
19.9M
#endif
5525
183M
    for (i = 0; i < (unsigned int)cnt; i++) {
5526
163M
        b->dp[i] ^= t->dp[i];
5527
163M
    }
5528
5529
19.9M
    return MP_OKAY;
5530
19.9M
}
5531
5532
/* Conditional swap of SP int values in constant time.
5533
 *
5534
 * @param [in]  a     First SP int to conditionally swap.
5535
 * @param [in]  b     Second SP int to conditionally swap.
5536
 * @param [in]  cnt   Count of words to copy.
5537
 * @param [in]  swap  When value is 1 then swap.
5538
 * @return  MP_OKAY on success.
5539
 * @return  MP_MEM when dynamic memory allocation fails.
5540
 */
5541
int sp_cond_swap_ct(sp_int* a, sp_int* b, int cnt, int swap)
5542
0
{
5543
0
    int err = MP_OKAY;
5544
0
    DECL_SP_INT(t, (size_t)cnt);
5545
5546
    /* Allocate temporary to hold masked xor of a and b. */
5547
0
    ALLOC_SP_INT(t, cnt, err, NULL);
5548
5549
0
    if (err == MP_OKAY) {
5550
0
        err = sp_cond_swap_ct_ex(a, b, cnt, swap, t);
5551
0
        FREE_SP_INT(t, NULL);
5552
0
    }
5553
5554
0
    return err;
5555
0
}
5556
#endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
5557
5558
#ifdef WOLFSSL_SP_INT_NEGATIVE
5559
/* Calculate the absolute value of the multi-precision number.
5560
 *
5561
 * @param  [in]   a  SP integer to calculate absolute value of.
5562
 * @param  [out]  r  SP integer to hold result.
5563
 *
5564
 * @return  MP_OKAY on success.
5565
 * @return  MP_VAL when a or r is NULL.
5566
 */
5567
int sp_abs(const sp_int* a, sp_int* r)
5568
67
{
5569
67
    int err;
5570
5571
    /* Copy a into r - copy fails when r is NULL. */
5572
67
    err = sp_copy(a, r);
5573
67
    if (err == MP_OKAY) {
5574
53
        r->sign = MP_ZPOS;
5575
53
    }
5576
5577
67
    return err;
5578
67
}
5579
#endif /* WOLFSSL_SP_INT_NEGATIVE */
5580
5581
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5582
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
5583
/* Compare absolute value of two multi-precision numbers.
5584
 *
5585
 * @param [in] a  SP integer.
5586
 * @param [in] b  SP integer.
5587
 *
5588
 * @return  MP_GT when a is greater than b.
5589
 * @return  MP_LT when a is less than b.
5590
 * @return  MP_EQ when a is equals b.
5591
 */
5592
static int _sp_cmp_abs(const sp_int* a, const sp_int* b)
5593
374M
{
5594
374M
    int ret = MP_EQ;
5595
5596
    /* Check number of words first. */
5597
374M
    if (a->used > b->used) {
5598
50.6M
        ret = MP_GT;
5599
50.6M
    }
5600
323M
    else if (a->used < b->used) {
5601
11.2M
        ret = MP_LT;
5602
11.2M
    }
5603
312M
    else {
5604
312M
        int i;
5605
5606
        /* Starting from most significant word, compare words.
5607
         * Stop when different and set comparison return.
5608
         */
5609
371M
        for (i = (int)(a->used - 1); i >= 0; i--) {
5610
368M
            if (a->dp[i] > b->dp[i]) {
5611
73.9M
                ret = MP_GT;
5612
73.9M
                break;
5613
73.9M
            }
5614
294M
            else if (a->dp[i] < b->dp[i]) {
5615
234M
                ret = MP_LT;
5616
234M
                break;
5617
234M
            }
5618
368M
        }
5619
        /* If we made to the end then ret is MP_EQ from initialization. */
5620
312M
    }
5621
5622
374M
    return ret;
5623
374M
}
5624
#endif
5625
5626
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
5627
/* Compare absolute value of two multi-precision numbers.
5628
 *
5629
 * Pointers are compared such that NULL is less than not NULL.
5630
 *
5631
 * @param [in] a  SP integer.
5632
 * @param [in] b  SP integer.
5633
 *
5634
 * @return  MP_GT when a is greater than b.
5635
 * @return  MP_LT when a is less than b.
5636
 * @return  MP_EQ when a equals b.
5637
 */
5638
int sp_cmp_mag(const sp_int* a, const sp_int* b)
5639
0
{
5640
0
    int ret;
5641
5642
    /* Do pointer checks first. Both NULL returns equal. */
5643
0
    if (a == b) {
5644
0
        ret = MP_EQ;
5645
0
    }
5646
    /* Nothing is smaller than something. */
5647
0
    else if (a == NULL) {
5648
0
        ret = MP_LT;
5649
0
    }
5650
    /* Something is larger than nothing. */
5651
0
    else if (b == NULL) {
5652
0
        ret = MP_GT;
5653
0
    }
5654
0
    else
5655
0
    {
5656
        /* Compare values - a and b are not NULL. */
5657
0
        ret = _sp_cmp_abs(a, b);
5658
0
    }
5659
5660
0
    return ret;
5661
0
}
5662
#endif
5663
5664
#if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
5665
    defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
5666
    (!defined(NO_RSA) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
5667
     defined(WOLFSSL_KEY_GEN)))
5668
/* Compare two multi-precision numbers.
5669
 *
5670
 * Assumes a and b are not NULL.
5671
 *
5672
 * @param [in] a  SP integer.
5673
 * @param [in] b  SP integer.
5674
 *
5675
 * @return  MP_GT when a is greater than b.
5676
 * @return  MP_LT when a is less than b.
5677
 * @return  MP_EQ when a is equals b.
5678
 */
5679
static int _sp_cmp(const sp_int* a, const sp_int* b)
5680
10.9M
{
5681
10.9M
    int ret;
5682
5683
10.9M
#ifdef WOLFSSL_SP_INT_NEGATIVE
5684
    /* Check sign first. */
5685
10.9M
    if (a->sign > b->sign) {
5686
1.22k
        ret = MP_LT;
5687
1.22k
    }
5688
10.9M
    else if (a->sign < b->sign) {
5689
511
        ret = MP_GT;
5690
511
    }
5691
10.9M
    else /* (a->sign == b->sign) */ {
5692
10.9M
#endif
5693
        /* Compare values. */
5694
10.9M
        ret = _sp_cmp_abs(a, b);
5695
10.9M
#ifdef WOLFSSL_SP_INT_NEGATIVE
5696
10.9M
        if (a->sign == MP_NEG) {
5697
            /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
5698
             * Swapping MP_GT and MP_LT results.
5699
             */
5700
661
            ret = -ret;
5701
661
        }
5702
10.9M
    }
5703
10.9M
#endif
5704
5705
10.9M
    return ret;
5706
10.9M
}
5707
#endif
5708
5709
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5710
    !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH) || \
5711
    defined(WOLFSSL_SP_MATH_ALL)
5712
/* Compare two multi-precision numbers.
5713
 *
5714
 * Pointers are compared such that NULL is less than not NULL.
5715
 *
5716
 * @param [in] a  SP integer.
5717
 * @param [in] b  SP integer.
5718
 *
5719
 * @return  MP_GT when a is greater than b.
5720
 * @return  MP_LT when a is less than b.
5721
 * @return  MP_EQ when a is equals b.
5722
 */
5723
int sp_cmp(const sp_int* a, const sp_int* b)
5724
13.7M
{
5725
13.7M
    int ret;
5726
5727
    /* Check pointers first. Both NULL returns equal. */
5728
13.7M
    if (a == b) {
5729
2.04k
        ret = MP_EQ;
5730
2.04k
    }
5731
    /* Nothing is smaller than something. */
5732
13.7M
    else if (a == NULL) {
5733
0
        ret = MP_LT;
5734
0
    }
5735
    /* Something is larger than nothing. */
5736
13.7M
    else if (b == NULL) {
5737
0
        ret = MP_GT;
5738
0
    }
5739
13.7M
    else
5740
13.7M
    {
5741
        /* Compare values - a and b are not NULL. */
5742
13.7M
        ret = _sp_cmp(a, b);
5743
13.7M
    }
5744
5745
13.7M
    return ret;
5746
13.7M
}
5747
#endif
5748
5749
#if defined(HAVE_ECC) && !defined(WC_NO_RNG) && \
5750
    defined(WOLFSSL_ECC_GEN_REJECT_SAMPLING)
5751
/* Compare two multi-precision numbers in constant time.
5752
 *
5753
 * Assumes a and b are not NULL.
5754
 * Assumes a and b are positive.
5755
 *
5756
 * @param [in] a  SP integer.
5757
 * @param [in] b  SP integer.
5758
 * @param [in] n  Number of digits to compare.
5759
 *
5760
 * @return  MP_GT when a is greater than b.
5761
 * @return  MP_LT when a is less than b.
5762
 * @return  MP_EQ when a is equals b.
5763
 */
5764
static int _sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
5765
{
5766
    int ret = MP_EQ;
5767
    int i;
5768
    volatile int mask = -1;
5769
5770
    for (i = n - 1; i >= 0; i--) {
5771
        sp_int_digit ad = a->dp[i] & ((sp_int_digit)0 - (i < (int)a->used));
5772
        sp_int_digit bd = b->dp[i] & ((sp_int_digit)0 - (i < (int)b->used));
5773
5774
        ret |= mask & ((0 - (ad < bd)) & MP_LT);
5775
        mask &= 0 - (ret == MP_EQ);
5776
        ret |= mask & ((0 - (ad > bd)) & MP_GT);
5777
        mask &= 0 - (ret == MP_EQ);
5778
    }
5779
5780
    return ret;
5781
}
5782
5783
/* Compare two multi-precision numbers in constant time.
5784
 *
5785
 * Pointers are compared such that NULL is less than not NULL.
5786
 * Assumes a and b are positive.
5787
 * Assumes a and b have n digits set at sometime.
5788
 *
5789
 * @param [in] a  SP integer.
5790
 * @param [in] b  SP integer.
5791
 * @param [in] n  Number of digits to compare.
5792
 *
5793
 * @return  MP_GT when a is greater than b.
5794
 * @return  MP_LT when a is less than b.
5795
 * @return  MP_EQ when a is equals b.
5796
 */
5797
int sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
5798
{
5799
    int ret;
5800
5801
    /* Check pointers first. Both NULL returns equal. */
5802
    if (a == b) {
5803
        ret = MP_EQ;
5804
    }
5805
    /* Nothing is smaller than something. */
5806
    else if (a == NULL) {
5807
        ret = MP_LT;
5808
    }
5809
    /* Something is larger than nothing. */
5810
    else if (b == NULL) {
5811
        ret = MP_GT;
5812
    }
5813
    else
5814
    {
5815
        /* Compare values - a and b are not NULL. */
5816
        ret = _sp_cmp_ct(a, b, n);
5817
    }
5818
5819
    return ret;
5820
}
5821
#endif /* HAVE_ECC && !WC_NO_RNG && WOLFSSL_ECC_GEN_REJECT_SAMPLING */
5822
5823
/*************************
5824
 * Bit check/set functions
5825
 *************************/
5826
5827
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5828
    ((defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_SM2)) && \
5829
     defined(HAVE_ECC)) || defined(OPENSSL_EXTRA) || defined(WOLFSSL_PUBLIC_MP)
5830
/* Check if a bit is set
5831
 *
5832
 * When a is NULL, result is 0.
5833
 *
5834
 * @param  [in]  a  SP integer.
5835
 * @param  [in]  b  Bit position to check.
5836
 *
5837
 * @return  0 when bit is not set.
5838
 * @return  1 when bit is set.
5839
 */
5840
int sp_is_bit_set(const sp_int* a, unsigned int b)
5841
4.71M
{
5842
4.71M
    int ret = 0;
5843
    /* Index of word. */
5844
4.71M
    unsigned int i = b >> SP_WORD_SHIFT;
5845
5846
    /* Check parameters. */
5847
4.71M
    if ((a != NULL) && (i < a->used)) {
5848
        /* Shift amount to get bit down to index 0. */
5849
4.71M
        unsigned int s = b & SP_WORD_MASK;
5850
5851
        /* Get and mask bit. */
5852
4.71M
        ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
5853
4.71M
    }
5854
5855
4.71M
    return ret;
5856
4.71M
}
5857
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) ||
5858
        * (WOLFSSL_SP_MATH_ALL && HAVE_ECC) */
5859
5860
/* Count the number of bits in the multi-precision number.
5861
 *
5862
 * When a is NULL, result is 0.
5863
 *
5864
 * @param  [in]  a  SP integer.
5865
 *
5866
 * @return  Number of bits in the SP integer value.
5867
 */
5868
int sp_count_bits(const sp_int* a)
5869
261M
{
5870
261M
    int n = -1;
5871
5872
    /* Check parameter. */
5873
261M
    if ((a != NULL) && (a->used > 0)) {
5874
        /* Get index of last word. */
5875
261M
        n = (int)(a->used - 1);
5876
        /* Don't count leading zeros. */
5877
261M
        while ((n >= 0) && (a->dp[n] == 0)) {
5878
2.65k
            n--;
5879
2.65k
        }
5880
261M
    }
5881
5882
    /* -1 indicates SP integer value was zero. */
5883
261M
    if (n < 0) {
5884
18.0k
        n = 0;
5885
18.0k
    }
5886
261M
    else {
5887
        /* Get the most significant word. */
5888
261M
        sp_int_digit d = a->dp[n];
5889
        /* Count of bits up to last word. */
5890
261M
        n *= SP_WORD_SIZE;
5891
5892
    #ifdef SP_ASM_HI_BIT_SET_IDX
5893
        {
5894
            sp_int_digit hi;
5895
            /* Get index of highest set bit. */
5896
            SP_ASM_HI_BIT_SET_IDX(d, hi);
5897
            /* Add bits up to and including index. */
5898
            n += (int)hi + 1;
5899
        }
5900
    #elif defined(SP_ASM_LZCNT)
5901
        {
5902
            sp_int_digit lz;
5903
            /* Count number of leading zeros in highest non-zero digit. */
5904
            SP_ASM_LZCNT(d, lz);
5905
            /* Add non-leading zero bits count. */
5906
            n += SP_WORD_SIZE - (int)lz;
5907
        }
5908
    #else
5909
        /* Check if top word has more than half the bits set. */
5910
261M
        if (d > SP_HALF_MAX) {
5911
            /* Set count to a full last word. */
5912
149M
            n += SP_WORD_SIZE;
5913
            /* Don't count leading zero bits. */
5914
484M
            while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
5915
334M
                n--;
5916
334M
                d <<= 1;
5917
334M
            }
5918
149M
        }
5919
112M
        else {
5920
            /* Add to count until highest set bit is shifted out. */
5921
2.93G
            while (d != 0) {
5922
2.82G
                n++;
5923
2.82G
                d >>= 1;
5924
2.82G
            }
5925
112M
        }
5926
261M
    #endif
5927
261M
    }
5928
5929
261M
    return n;
5930
261M
}
5931
5932
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5933
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
5934
    (defined(HAVE_ECC) && defined(FP_ECC)) || \
5935
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
5936
5937
/* Number of entries in array of number of least significant zero bits. */
5938
#define SP_LNZ_CNT      16
5939
/* Number of bits the array checks. */
5940
82.5k
#define SP_LNZ_BITS     4
5941
/* Mask to apply to check with array. */
5942
412k
#define SP_LNZ_MASK     0xf
5943
/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
5944
static const int sp_lnz[SP_LNZ_CNT] = {
5945
   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
5946
};
5947
5948
/* Count the number of least significant zero bits.
5949
 *
5950
 * When a is not NULL, result is 0.
5951
 *
5952
 * @param  [in]   a  SP integer to use.
5953
 *
5954
 * @return  Number of least significant zero bits.
5955
 */
5956
#if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
5957
static
5958
#endif /* !HAVE_ECC || HAVE_COMP_KEY */
5959
int sp_cnt_lsb(const sp_int* a)
5960
329k
{
5961
329k
    unsigned int bc = 0;
5962
5963
    /* Check for number with a value. */
5964
329k
    if ((a != NULL) && (!sp_iszero(a))) {
5965
329k
        unsigned int i;
5966
329k
        unsigned int j;
5967
5968
        /* Count least significant words that are zero. */
5969
338k
        for (i = 0; (i < a->used) && (a->dp[i] == 0); i++, bc += SP_WORD_SIZE) {
5970
8.71k
        }
5971
5972
        /* Use 4-bit table to get count. */
5973
412k
        for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
5974
            /* Get number of lesat significant 0 bits in nibble. */
5975
412k
            int cnt = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
5976
            /* Done if not all 4 bits are zero. */
5977
412k
            if (cnt != 4) {
5978
                /* Add checked bits and count in last 4 bits checked. */
5979
329k
                bc += j + (unsigned int)cnt;
5980
329k
                break;
5981
329k
            }
5982
412k
        }
5983
329k
    }
5984
5985
329k
    return (int)bc;
5986
329k
}
5987
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
5988
5989
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_ASN_TEMPLATE) || \
5990
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
5991
/* Determine if the most significant byte of the encoded multi-precision number
5992
 * has the top bit set.
5993
 *
5994
 * When a is NULL, result is 0.
5995
 *
5996
 * @param  [in]  a  SP integer.
5997
 *
5998
 * @return  1 when the top bit of top byte is set.
5999
 * @return  0 when the top bit of top byte is not set.
6000
 */
6001
int sp_leading_bit(const sp_int* a)
6002
4.50k
{
6003
4.50k
    int bit = 0;
6004
6005
    /* Check if we have a number and value to use. */
6006
4.50k
    if ((a != NULL) && (a->used > 0)) {
6007
        /* Get top word. */
6008
4.49k
        sp_int_digit d = a->dp[a->used - 1];
6009
6010
    #if SP_WORD_SIZE > 8
6011
        /* Remove bottom 8 bits until highest 8 bits left. */
6012
        while (d > (sp_int_digit)0xff) {
6013
            d >>= 8;
6014
        }
6015
    #endif
6016
        /* Get the highest bit of the 8-bit value. */
6017
4.49k
        bit = (int)(d >> 7);
6018
4.49k
    }
6019
6020
4.50k
    return bit;
6021
4.50k
}
6022
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
6023
6024
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
6025
    defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
6026
    !defined(NO_RSA)
6027
/* Set one bit of a: a |= 1 << i
6028
 * The field 'used' is updated in a.
6029
 *
6030
 * @param  [in,out]  a  SP integer to set bit into.
6031
 * @param  [in]      i  Index of bit to set.
6032
 *
6033
 * @return  MP_OKAY on success.
6034
 * @return  MP_VAL when a is NULL, index is negative or index is too large.
6035
 */
6036
int sp_set_bit(sp_int* a, int i)
6037
848k
{
6038
848k
    int err = MP_OKAY;
6039
    /* Get index of word to set. */
6040
848k
    sp_size_t w = (sp_size_t)(i >> SP_WORD_SHIFT);
6041
6042
    /* Check for valid number and and space for bit. */
6043
848k
    if ((a == NULL) || (i < 0) || (w >= a->size)) {
6044
106
        err = MP_VAL;
6045
106
    }
6046
848k
    if (err == MP_OKAY) {
6047
        /* Amount to shift up to set bit in word. */
6048
848k
        unsigned int s = (unsigned int)(i & (SP_WORD_SIZE - 1));
6049
848k
        unsigned int j;
6050
6051
        /* Set to zero all unused words up to and including word to have bit
6052
         * set.
6053
         */
6054
6.65M
        for (j = a->used; j <= w; j++) {
6055
5.80M
            a->dp[j] = 0;
6056
5.80M
        }
6057
        /* Set bit in word. */
6058
848k
        a->dp[w] |= (sp_int_digit)1 << s;
6059
        /* Update used if necessary */
6060
848k
        if (a->used <= w) {
6061
848k
            a->used = (sp_size_t)(w + 1U);
6062
848k
        }
6063
848k
    }
6064
6065
848k
    return err;
6066
848k
}
6067
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
6068
        * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
6069
6070
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6071
    defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
6072
/* Exponentiate 2 to the power of e: a = 2^e
6073
 * This is done by setting the 'e'th bit.
6074
 *
6075
 * @param  [out]  a  SP integer to hold result.
6076
 * @param  [in]   e  Exponent.
6077
 *
6078
 * @return  MP_OKAY on success.
6079
 * @return  MP_VAL when a is NULL, e is negative or 2^exponent is too large.
6080
 */
6081
int sp_2expt(sp_int* a, int e)
6082
272
{
6083
272
    int err = MP_OKAY;
6084
6085
    /* Validate parameters. */
6086
272
    if ((a == NULL) || (e < 0)) {
6087
0
        err = MP_VAL;
6088
0
    }
6089
272
    if (err == MP_OKAY) {
6090
        /* Set number to zero and then set bit. */
6091
272
        _sp_zero(a);
6092
272
        err = sp_set_bit(a, e);
6093
272
    }
6094
6095
272
    return err;
6096
272
}
6097
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
6098
        * WOLFSSL_KEY_GEN || !NO_DH */
6099
6100
/**********************
6101
 * Digit/Long functions
6102
 **********************/
6103
6104
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
6105
    defined(HAVE_ECC)
6106
/* Set the multi-precision number to be the value of the digit.
6107
 *
6108
 * @param  [out]  a  SP integer to become number.
6109
 * @param  [in]   d  Digit to be set.
6110
 */
6111
static void _sp_set(sp_int* a, sp_int_digit d)
6112
4.81M
{
6113
    /* Use sp_int_minimal to support allocated byte arrays as sp_ints. */
6114
4.81M
    sp_int_minimal* am = (sp_int_minimal*)a;
6115
6116
4.81M
    am->dp[0] = d;
6117
    /* d == 0 => used = 0, d > 0 => used = 1 */
6118
4.81M
    am->used = (d > 0);
6119
4.81M
#ifdef WOLFSSL_SP_INT_NEGATIVE
6120
4.81M
    am->sign = MP_ZPOS;
6121
4.81M
#endif
6122
4.81M
}
6123
6124
/* Set the multi-precision number to be the value of the digit.
6125
 *
6126
 * @param  [out]  a  SP integer to become number.
6127
 * @param  [in]   d  Digit to be set.
6128
 *
6129
 * @return  MP_OKAY on success.
6130
 * @return  MP_VAL when a is NULL.
6131
 */
6132
int sp_set(sp_int* a, sp_int_digit d)
6133
517k
{
6134
517k
    int err = MP_OKAY;
6135
6136
    /* Validate parameters. */
6137
517k
    if (a == NULL) {
6138
0
        err = MP_VAL;
6139
0
    }
6140
517k
    if (err == MP_OKAY) {
6141
517k
        _sp_set(a, d);
6142
517k
    }
6143
6144
517k
    return err;
6145
517k
}
6146
#endif
6147
6148
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
6149
/* Set a number into the multi-precision number.
6150
 *
6151
 * Number may be larger than the size of a digit.
6152
 *
6153
 * @param  [out]  a  SP integer to set.
6154
 * @param  [in]   n  Long value to set.
6155
 *
6156
 * @return  MP_OKAY on success.
6157
 * @return  MP_VAL when a is NULL.
6158
 */
6159
int sp_set_int(sp_int* a, unsigned long n)
6160
92
{
6161
92
    int err = MP_OKAY;
6162
6163
92
    if (a == NULL) {
6164
0
        err = MP_VAL;
6165
0
    }
6166
6167
92
    if (err == MP_OKAY) {
6168
92
    #if SP_WORD_SIZE < SP_ULONG_BITS
6169
        /* Assign if value first in one word. */
6170
92
        if (n <= (sp_int_digit)SP_DIGIT_MAX) {
6171
55
    #endif
6172
55
            a->dp[0] = (sp_int_digit)n;
6173
55
            a->used = (n != 0);
6174
55
    #if SP_WORD_SIZE < SP_ULONG_BITS
6175
55
        }
6176
37
        else {
6177
37
            unsigned int i;
6178
6179
            /* Assign value word by word. */
6180
306
            for (i = 0; (i < a->size) && (n > 0); i++,n >>= SP_WORD_SIZE) {
6181
269
                a->dp[i] = (sp_int_digit)n;
6182
269
            }
6183
            /* Update number of words used. */
6184
37
            a->used = i;
6185
            /* Check for overflow. */
6186
37
            if ((i == a->size) && (n != 0)) {
6187
2
                err = MP_VAL;
6188
2
            }
6189
37
        }
6190
92
    #endif
6191
92
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6192
92
        a->sign = MP_ZPOS;
6193
92
    #endif
6194
92
    }
6195
6196
92
    return err;
6197
92
}
6198
#endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA  */
6199
6200
#if defined(WOLFSSL_SP_MATH_ALL) || \
6201
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6202
    !defined(NO_DH) || defined(HAVE_ECC)
6203
/* Compare a one digit number with a multi-precision number.
6204
 *
6205
 * When a is NULL, MP_LT is returned.
6206
 *
6207
 * @param  [in]  a  SP integer to compare.
6208
 * @param  [in]  d  Digit to compare with.
6209
 *
6210
 * @return  MP_GT when a is greater than d.
6211
 * @return  MP_LT when a is less than d.
6212
 * @return  MP_EQ when a is equals d.
6213
 */
6214
int sp_cmp_d(const sp_int* a, sp_int_digit d)
6215
6.67M
{
6216
6.67M
    int ret = MP_EQ;
6217
6218
    /* No SP integer is always less - even when d is zero. */
6219
6.67M
    if (a == NULL) {
6220
0
        ret = MP_LT;
6221
0
    }
6222
6.67M
    else
6223
6.67M
#ifdef WOLFSSL_SP_INT_NEGATIVE
6224
    /* Check sign first. */
6225
6.67M
    if (a->sign == MP_NEG) {
6226
127
        ret = MP_LT;
6227
127
    }
6228
6.67M
    else
6229
6.67M
#endif
6230
6.67M
    {
6231
        /* Check if SP integer as more than one word. */
6232
6.67M
        if (a->used > 1) {
6233
1.46M
            ret = MP_GT;
6234
1.46M
        }
6235
        /* Special case for zero. */
6236
5.21M
        else if (a->used == 0) {
6237
175k
            if (d != 0) {
6238
174k
                ret = MP_LT;
6239
174k
            }
6240
            /* ret initialized to equal. */
6241
175k
        }
6242
5.03M
        else {
6243
            /* The single word in the SP integer can now be compared with d. */
6244
5.03M
            if (a->dp[0] > d) {
6245
782k
                ret = MP_GT;
6246
782k
            }
6247
4.25M
            else if (a->dp[0] < d) {
6248
28.0k
                ret = MP_LT;
6249
28.0k
            }
6250
            /* ret initialized to equal. */
6251
5.03M
        }
6252
6.67M
    }
6253
6254
6.67M
    return ret;
6255
6.67M
}
6256
#endif
6257
6258
#if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
6259
    defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
6260
/* Add a one digit number to the multi-precision number.
6261
 *
6262
 * @param  [in]   a  SP integer be added to.
6263
 * @param  [in]   d  Digit to add.
6264
 * @param  [out]  r  SP integer to store result in.
6265
 *
6266
 * @return  MP_OKAY on success.
6267
 * @return  MP_VAL when result is too large for fixed size dp array.
6268
 */
6269
static int _sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
6270
6.11M
{
6271
6.11M
    int err = MP_OKAY;
6272
6273
    /* Special case of zero means we want result to have a digit when not adding
6274
     * zero. */
6275
6.11M
    if (a->used == 0) {
6276
73.5k
        r->dp[0] = d;
6277
73.5k
        r->used = (d > 0);
6278
73.5k
    }
6279
6.04M
    else {
6280
6.04M
        unsigned int i = 0;
6281
6.04M
        sp_int_digit a0 = a->dp[0];
6282
6283
        /* Set used of result - updated if overflow seen. */
6284
6.04M
        r->used = a->used;
6285
6286
6.04M
        r->dp[0] = a0 + d;
6287
        /* Check for carry. */
6288
6.04M
        if (r->dp[0] < a0) {
6289
            /* Do carry through all words. */
6290
24.0k
            for (++i; i < a->used; i++) {
6291
23.2k
                r->dp[i] = a->dp[i] + 1;
6292
23.2k
                if (r->dp[i] != 0) {
6293
14.0k
                   break;
6294
14.0k
                }
6295
23.2k
            }
6296
            /* Add another word if required. */
6297
14.7k
            if (i == a->used) {
6298
                /* Check result has enough space for another word. */
6299
751
                if (i < r->size) {
6300
751
                    r->used++;
6301
751
                    r->dp[i] = 1;
6302
751
                }
6303
0
                else {
6304
0
                    err = MP_VAL;
6305
0
                }
6306
751
            }
6307
14.7k
        }
6308
        /* When result is not the same as input, copy rest of digits. */
6309
6.04M
        if ((err == MP_OKAY) && (r != a)) {
6310
            /* Copy any words that didn't update with carry. */
6311
12.5k
            for (++i; i < a->used; i++) {
6312
9.95k
                r->dp[i] = a->dp[i];
6313
9.95k
            }
6314
2.55k
        }
6315
6.04M
    }
6316
6317
6.11M
    return err;
6318
6.11M
}
6319
#endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
6320
        * defined(WOLFSSL_SP_READ_RADIX_10) */
6321
6322
#if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
6323
    defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
6324
    defined(WOLFSSL_SP_INVMOD_MONT_CT) || (defined(WOLFSSL_SP_PRIME_GEN) && \
6325
    !defined(WC_NO_RNG))
6326
/* Sub a one digit number from the multi-precision number.
6327
 *
6328
 * @param  [in]   a  SP integer be subtracted from.
6329
 * @param  [in]   d  Digit to subtract.
6330
 * @param  [out]  r  SP integer to store result in.
6331
 */
6332
static void _sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
6333
72.7k
{
6334
    /* Set result used to be same as input. Updated with clamp. */
6335
72.7k
    r->used = a->used;
6336
    /* Only possible when not handling negatives. */
6337
72.7k
    if (a->used == 0) {
6338
        /* Set result to zero as no negative support. */
6339
21
        r->dp[0] = 0;
6340
21
    }
6341
72.7k
    else {
6342
72.7k
        unsigned int i = 0;
6343
72.7k
        sp_int_digit a0 = a->dp[0];
6344
6345
72.7k
        r->dp[0] = a0 - d;
6346
        /* Check for borrow. */
6347
72.7k
        if (r->dp[0] > a0) {
6348
            /* Do borrow through all words. */
6349
8.87k
            for (++i; i < a->used; i++) {
6350
8.85k
                r->dp[i] = a->dp[i] - 1;
6351
8.85k
                if (r->dp[i] != SP_DIGIT_MAX) {
6352
2.17k
                   break;
6353
2.17k
                }
6354
8.85k
            }
6355
2.20k
        }
6356
        /* When result is not the same as input, copy rest of digits. */
6357
72.7k
        if (r != a) {
6358
            /* Copy any words that didn't update with borrow. */
6359
336k
            for (++i; i < a->used; i++) {
6360
280k
                r->dp[i] = a->dp[i];
6361
280k
            }
6362
56.5k
        }
6363
        /* Remove leading zero words. */
6364
72.7k
        sp_clamp(r);
6365
72.7k
    }
6366
72.7k
}
6367
#endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
6368
        * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
6369
        * WOLFSSL_SP_PRIME_GEN */
6370
6371
#ifdef WOLFSSL_SP_ADD_D
6372
/* Add a one digit number to the multi-precision number.
6373
 *
6374
 * @param  [in]   a  SP integer be added to.
6375
 * @param  [in]   d  Digit to add.
6376
 * @param  [out]  r  SP integer to store result in.
6377
 *
6378
 * @return  MP_OKAY on success.
6379
 * @return  MP_VAL when result is too large for fixed size dp array.
6380
 */
6381
int sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
6382
45.5k
{
6383
45.5k
    int err = MP_OKAY;
6384
6385
    /* Check validity of parameters. */
6386
45.5k
    if ((a == NULL) || (r == NULL)) {
6387
0
        err = MP_VAL;
6388
0
    }
6389
6390
#ifndef WOLFSSL_SP_INT_NEGATIVE
6391
    /* Check for space in result especially when carry adds a new word. */
6392
    if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
6393
         err = MP_VAL;
6394
    }
6395
    if (err == MP_OKAY) {
6396
        /* Positive only so just use internal function. */
6397
        err = _sp_add_d(a, d, r);
6398
    }
6399
#else
6400
    /* Check for space in result especially when carry adds a new word. */
6401
45.5k
    if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used + 1 > r->size)) {
6402
13
         err = MP_VAL;
6403
13
    }
6404
    /* Check for space in result - no carry but borrow possible. */
6405
45.5k
    if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used > r->size)) {
6406
9
         err = MP_VAL;
6407
9
    }
6408
45.5k
    if (err == MP_OKAY) {
6409
45.5k
        if (a->sign == MP_ZPOS) {
6410
            /* Positive, so use internal function. */
6411
45.3k
            r->sign = MP_ZPOS;
6412
45.3k
            err = _sp_add_d(a, d, r);
6413
45.3k
        }
6414
174
        else if ((a->used > 1) || (a->dp[0] > d)) {
6415
            /* Negative value bigger than digit so subtract digit. */
6416
98
            r->sign = MP_NEG;
6417
98
            _sp_sub_d(a, d, r);
6418
98
        }
6419
76
        else {
6420
            /* Negative value smaller or equal to digit. */
6421
76
            r->sign = MP_ZPOS;
6422
            /* Subtract negative value from digit. */
6423
76
            r->dp[0] = d - a->dp[0];
6424
            /* Result is a digit equal to or greater than zero. */
6425
76
            r->used = (r->dp[0] > 0);
6426
76
        }
6427
45.5k
    }
6428
45.5k
#endif
6429
6430
45.5k
    return err;
6431
45.5k
}
6432
#endif /* WOLFSSL_SP_ADD_D */
6433
6434
#ifdef WOLFSSL_SP_SUB_D
6435
/* Sub a one digit number from the multi-precision number.
6436
 *
6437
 * @param  [in]   a  SP integer be subtracted from.
6438
 * @param  [in]   d  Digit to subtract.
6439
 * @param  [out]  r  SP integer to store result in.
6440
 *
6441
 * @return  MP_OKAY on success.
6442
 * @return  MP_VAL when a or r is NULL.
6443
 */
6444
int sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
6445
40.8k
{
6446
40.8k
    int err = MP_OKAY;
6447
6448
    /* Check validity of parameters. */
6449
40.8k
    if ((a == NULL) || (r == NULL)) {
6450
0
        err = MP_VAL;
6451
0
    }
6452
#ifndef WOLFSSL_SP_INT_NEGATIVE
6453
    /* Check for space in result. */
6454
    if ((err == MP_OKAY) && (a->used > r->size)) {
6455
         err = MP_VAL;
6456
    }
6457
    if (err == MP_OKAY) {
6458
        /* Positive only so just use internal function. */
6459
        _sp_sub_d(a, d, r);
6460
    }
6461
#else
6462
    /* Check for space in result especially when borrow adds a new word. */
6463
40.8k
    if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used + 1 > r->size)) {
6464
8
         err = MP_VAL;
6465
8
    }
6466
    /* Check for space in result - no carry but borrow possible. */
6467
40.8k
    if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used > r->size)) {
6468
8
         err = MP_VAL;
6469
8
    }
6470
40.8k
    if (err == MP_OKAY) {
6471
40.8k
        if (a->sign == MP_NEG) {
6472
            /* Subtracting from negative use internal add. */
6473
100
            r->sign = MP_NEG;
6474
100
            err = _sp_add_d(a, d, r);
6475
100
        }
6476
40.7k
        else if ((a->used > 1) || (a->dp[0] >= d)) {
6477
            /* Positive number greater than or equal to digit - subtract digit.
6478
             */
6479
40.7k
            r->sign = MP_ZPOS;
6480
40.7k
            _sp_sub_d(a, d, r);
6481
40.7k
        }
6482
13
        else {
6483
            /* Positive value smaller than digit. */
6484
13
            r->sign = MP_NEG;
6485
            /* Subtract positive value from digit. */
6486
13
            r->dp[0] = d - a->dp[0];
6487
            /* Result is a digit equal to or greater than zero. */
6488
13
            r->used = 1;
6489
13
        }
6490
40.8k
    }
6491
40.8k
#endif
6492
6493
40.8k
    return err;
6494
40.8k
}
6495
#endif /* WOLFSSL_SP_SUB_D */
6496
6497
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6498
    defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
6499
    !defined(NO_DH) || defined(HAVE_ECC) || \
6500
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6501
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
6502
    (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)) || \
6503
    defined(WOLFSSL_SP_MUL_D)
6504
/* Multiply a by digit n and put result into r shifting up o digits.
6505
 *   r = (a * n) << (o * SP_WORD_SIZE)
6506
 *
6507
 * @param  [in]   a  SP integer to be multiplied.
6508
 * @param  [in]   d  SP digit to multiply by.
6509
 * @param  [out]  r  SP integer result.
6510
 * @param  [in]   o  Number of digits to move result up by.
6511
 * @return  MP_OKAY on success.
6512
 * @return  MP_VAL when result is too large for sp_int.
6513
 */
6514
static int _sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r, unsigned int o)
6515
6.06M
{
6516
6.06M
    int err = MP_OKAY;
6517
6.06M
    unsigned int i;
6518
6.06M
#ifndef SQR_MUL_ASM
6519
6.06M
    sp_int_word t = 0;
6520
#else
6521
    sp_int_digit l = 0;
6522
    sp_int_digit h = 0;
6523
#endif
6524
6525
#ifdef WOLFSSL_SP_SMALL
6526
    /* Zero out offset words. */
6527
    for (i = 0; i < o; i++) {
6528
        r->dp[i] = 0;
6529
    }
6530
#else
6531
    /* Don't use the offset. Only when doing small code size div. */
6532
6.06M
    (void)o;
6533
6.06M
#endif
6534
6535
    /* Multiply each word of a by n. */
6536
235M
    for (i = 0; i < a->used; i++, o++) {
6537
229M
    #ifndef SQR_MUL_ASM
6538
        /* Add product to top word of previous result. */
6539
229M
        t += (sp_int_word)a->dp[i] * d;
6540
        /* Store low word. */
6541
229M
        r->dp[o] = (sp_int_digit)t;
6542
        /* Move top word down. */
6543
229M
        t >>= SP_WORD_SIZE;
6544
    #else
6545
        /* Multiply and add into low and high from previous result.
6546
         * No overflow of possible with add. */
6547
        SP_ASM_MUL_ADD_NO(l, h, a->dp[i], d);
6548
        /* Store low word. */
6549
        r->dp[o] = l;
6550
        /* Move high word into low word and set high word to 0. */
6551
        l = h;
6552
        h = 0;
6553
    #endif
6554
229M
    }
6555
6556
    /* Check whether new word to be appended to result. */
6557
6.06M
#ifndef SQR_MUL_ASM
6558
6.06M
    if (t > 0)
6559
#else
6560
    if (l > 0)
6561
#endif
6562
649k
    {
6563
        /* Validate space available in result. */
6564
649k
        if (o == r->size) {
6565
102
            err = MP_VAL;
6566
102
        }
6567
649k
        else {
6568
            /* Store new top word. */
6569
649k
        #ifndef SQR_MUL_ASM
6570
649k
            r->dp[o++] = (sp_int_digit)t;
6571
        #else
6572
            r->dp[o++] = l;
6573
        #endif
6574
649k
        }
6575
649k
    }
6576
    /* Update number of words in result. */
6577
6.06M
    r->used = (sp_size_t)o;
6578
    /* In case n is zero. */
6579
6.06M
    sp_clamp(r);
6580
6581
6.06M
    return err;
6582
6.06M
}
6583
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
6584
        *  WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
6585
6586
#ifdef WOLFSSL_SP_MUL_D
6587
/* Multiply a by digit n and put result into r. r = a * n
6588
 *
6589
 * @param  [in]   a  SP integer to multiply.
6590
 * @param  [in]   n  Digit to multiply by.
6591
 * @param  [out]  r  SP integer to hold result.
6592
 *
6593
 * @return  MP_OKAY on success.
6594
 * @return  MP_VAL when a or b is NULL, or a has maximum number of digits used.
6595
 */
6596
int sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r)
6597
218
{
6598
218
    int err = MP_OKAY;
6599
6600
    /* Validate parameters. */
6601
218
    if ((a == NULL) || (r == NULL)) {
6602
0
        err = MP_VAL;
6603
0
    }
6604
    /* Check space for product result - _sp_mul_d checks when new word added. */
6605
218
    if ((err == MP_OKAY) && (a->used > r->size)) {
6606
5
        err = MP_VAL;
6607
5
    }
6608
6609
218
    if (err == MP_OKAY) {
6610
213
        err = _sp_mul_d(a, d, r, 0);
6611
213
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6612
        /* Update sign. */
6613
213
        if (d == 0) {
6614
29
            r->sign = MP_ZPOS;
6615
29
        }
6616
184
        else {
6617
184
            r->sign = a->sign;
6618
184
        }
6619
213
    #endif
6620
213
    }
6621
6622
218
    return err;
6623
218
}
6624
#endif /* WOLFSSL_SP_MUL_D */
6625
6626
/* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
6627
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6628
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
6629
    defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
6630
#define WOLFSSL_SP_DIV_D
6631
#endif
6632
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6633
    !defined(NO_DH) || \
6634
    (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
6635
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
6636
#define WOLFSSL_SP_MOD_D
6637
#endif
6638
6639
#if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6640
     (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6641
      !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
6642
    defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
6643
#ifndef SP_ASM_DIV_WORD
6644
/* Divide a two digit number by a digit number and return. (hi | lo) / d
6645
 *
6646
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
6647
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
6648
 * @param  [in]  d   SP integer digit. Number to divide by.
6649
 * @return  The division result.
6650
 */
6651
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
6652
    sp_int_digit d)
6653
57.9M
{
6654
#ifdef WOLFSSL_SP_DIV_WORD_HALF
6655
    sp_int_digit r;
6656
6657
    /* Trial division using half of the bits in d. */
6658
6659
    /* Check for shortcut when no high word set. */
6660
    if (hi == 0) {
6661
        r = lo / d;
6662
    }
6663
    else {
6664
        /* Half the bits of d. */
6665
        sp_int_digit divh = d >> SP_HALF_SIZE;
6666
        /* Number to divide in one value. */
6667
        sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
6668
        sp_int_word trial;
6669
        sp_int_digit r2;
6670
6671
        /* Calculation for top SP_WORD_SIZE / 2 bits of dividend. */
6672
        /* Divide high word by top half of divisor. */
6673
        r = hi / divh;
6674
        /* When result too big then assume only max value. */
6675
        if (r > SP_HALF_MAX) {
6676
            r = SP_HALF_MAX;
6677
        }
6678
        /* Shift up result for trial division calculation. */
6679
        r <<= SP_HALF_SIZE;
6680
        /* Calculate trial value. */
6681
        trial = r * (sp_int_word)d;
6682
        /* Decrease r while trial is too big. */
6683
        while (trial > w) {
6684
            r -= (sp_int_digit)1 << SP_HALF_SIZE;
6685
            trial -= (sp_int_word)d << SP_HALF_SIZE;
6686
        }
6687
        /* Subtract trial. */
6688
        w -= trial;
6689
6690
        /* Calculation for remaining second SP_WORD_SIZE / 2 bits. */
6691
        /* Divide top SP_WORD_SIZE of remainder by top half of divisor. */
6692
        r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divh;
6693
        /* Calculate trial value. */
6694
        trial = r2 * (sp_int_word)d;
6695
        /* Decrease r while trial is too big. */
6696
        while (trial > w) {
6697
            r2--;
6698
            trial -= d;
6699
        }
6700
        /* Subtract trial. */
6701
        w -= trial;
6702
        /* Update result. */
6703
        r += r2;
6704
6705
        /* Calculation for remaining bottom SP_WORD_SIZE bits. */
6706
        r2 = ((sp_int_digit)w) / d;
6707
        /* Update result. */
6708
        r += r2;
6709
    }
6710
6711
    return r;
6712
#else
6713
57.9M
    sp_int_word w;
6714
57.9M
    sp_int_digit r;
6715
6716
    /* Use built-in divide. */
6717
57.9M
    w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
6718
57.9M
    w /= d;
6719
57.9M
    r = (sp_int_digit)w;
6720
6721
57.9M
    return r;
6722
57.9M
#endif /* WOLFSSL_SP_DIV_WORD_HALF */
6723
57.9M
}
6724
#endif /* !SP_ASM_DIV_WORD */
6725
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
6726
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6727
6728
#if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
6729
    !defined(WOLFSSL_SP_SMALL)
6730
6731
#if SP_WORD_SIZE == 64
6732
    /* 2^64 / 3 */
6733
    #define SP_DIV_3_CONST      0x5555555555555555L
6734
    /* 2^64 / 10 */
6735
    #define SP_DIV_10_CONST     0x1999999999999999L
6736
#elif SP_WORD_SIZE == 32
6737
    /* 2^32 / 3 */
6738
    #define SP_DIV_3_CONST      0x55555555
6739
    /* 2^32 / 10 */
6740
    #define SP_DIV_10_CONST     0x19999999
6741
#elif SP_WORD_SIZE == 16
6742
    /* 2^16 / 3 */
6743
    #define SP_DIV_3_CONST      0x5555
6744
    /* 2^16 / 10 */
6745
    #define SP_DIV_10_CONST     0x1999
6746
#elif SP_WORD_SIZE == 8
6747
    /* 2^8 / 3 */
6748
7.13k
    #define SP_DIV_3_CONST      0x55
6749
    /* 2^8 / 10 */
6750
98.2M
    #define SP_DIV_10_CONST     0x19
6751
#endif
6752
6753
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE < 64)
6754
/* Divide by 3: r = a / 3 and rem = a % 3
6755
 *
6756
 * Used in checking prime: (a % 3) == 0?.
6757
 *
6758
 * @param  [in]   a    SP integer to be divided.
6759
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
6760
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
6761
 */
6762
static void _sp_div_3(const sp_int* a, sp_int* r, sp_int_digit* rem)
6763
7.04k
{
6764
7.04k
#ifndef SQR_MUL_ASM
6765
7.04k
    sp_int_word t;
6766
7.04k
    sp_int_digit tt;
6767
#else
6768
    sp_int_digit l = 0;
6769
    sp_int_digit tt = 0;
6770
    sp_int_digit t = SP_DIV_3_CONST;
6771
    sp_int_digit lm = 0;
6772
    sp_int_digit hm = 0;
6773
#endif
6774
7.04k
    sp_int_digit tr = 0;
6775
    /* Quotient fixup. */
6776
7.04k
    static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
6777
    /* Remainder fixup. */
6778
7.04k
    static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
6779
6780
    /* Check whether only mod value needed. */
6781
7.04k
    if (r == NULL) {
6782
7.03k
        unsigned int i;
6783
6784
        /*    2^2 mod 3 = 4 mod 3 = 1.
6785
         * => 2^(2*n) mod 3 = (2^2 mod 3)^n mod 3 = 1^n mod 3 = 1
6786
         * => (2^(2*n) * x) mod 3 = (2^(2*n) mod 3) * (x mod 3) = x mod 3
6787
         *
6788
         * Calculate mod 3 on sum of digits as SP_WORD_SIZE is a multiple of 2.
6789
         */
6790
7.03k
    #ifndef SQR_MUL_ASM
6791
7.03k
        t = 0;
6792
        /* Sum the digits. */
6793
261k
        for (i = 0; i < a->used; i++) {
6794
254k
            t += a->dp[i];
6795
254k
        }
6796
        /* Sum digits of sum. */
6797
7.03k
        t = (t >> SP_WORD_SIZE) + (t & SP_MASK);
6798
        /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
6799
7.03k
        tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
6800
        /* Subtract trial division. */
6801
7.03k
        tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
6802
    #else
6803
        /* Sum the digits. */
6804
        for (i = 0; i < a->used; i++) {
6805
            SP_ASM_ADDC_REG(l, tr, a->dp[i]);
6806
        }
6807
        /* Sum digits of sum - can get carry. */
6808
        SP_ASM_ADDC_REG(l, tt, tr);
6809
        /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
6810
        SP_ASM_MUL(lm, hm, l, t);
6811
        /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
6812
        hm += tt * SP_DIV_3_CONST;
6813
        /* Subtract trial division from digit. */
6814
        tr = l - (hm * 3);
6815
    #endif
6816
        /* tr is 0..5 but need 0..2 */
6817
        /* Fix up remainder. */
6818
7.03k
        tr = sp_rem6[tr];
6819
7.03k
        *rem = tr;
6820
7.03k
    }
6821
    /* At least result needed - remainder is calculated anyway. */
6822
12
    else {
6823
12
        int i;
6824
6825
        /* Divide starting at most significant word down to least. */
6826
105
        for (i = (int)(a->used - 1); i >= 0; i--) {
6827
93
    #ifndef SQR_MUL_ASM
6828
            /* Combine remainder from last operation with this word. */
6829
93
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6830
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
6831
93
            tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
6832
            /* Subtract trial division. */
6833
93
            tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
6834
    #else
6835
            /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
6836
            SP_ASM_MUL(l, tt, a->dp[i], t);
6837
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
6838
            tt += tr * SP_DIV_3_CONST;
6839
            /* Subtract trial division from digit. */
6840
            tr = a->dp[i] - (tt * 3);
6841
    #endif
6842
            /* tr is 0..5 but need 0..2 */
6843
            /* Fix up result. */
6844
93
            tt += sp_r6[tr];
6845
            /* Fix up remainder. */
6846
93
            tr = sp_rem6[tr];
6847
            /* Store result of digit divided by 3. */
6848
93
            r->dp[i] = tt;
6849
93
        }
6850
6851
        /* Set the used amount to maximal amount. */
6852
12
        r->used = a->used;
6853
        /* Remove leading zeros. */
6854
12
        sp_clamp(r);
6855
        /* Return remainder if required. */
6856
12
        if (rem != NULL) {
6857
6
            *rem = tr;
6858
6
        }
6859
12
    }
6860
7.04k
}
6861
#endif /* !(WOLFSSL_SP_SMALL && (SP_WORD_SIZE < 64) */
6862
6863
/* Divide by 10: r = a / 10 and rem = a % 10
6864
 *
6865
 * Used when writing with a radix of 10 - decimal number.
6866
 *
6867
 * @param  [in]   a    SP integer to be divided.
6868
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
6869
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
6870
 */
6871
static void _sp_div_10(const sp_int* a, sp_int* r, sp_int_digit* rem)
6872
5.55M
{
6873
5.55M
    int i;
6874
5.55M
#ifndef SQR_MUL_ASM
6875
5.55M
    sp_int_word t;
6876
5.55M
    sp_int_digit tt;
6877
#else
6878
    sp_int_digit l = 0;
6879
    sp_int_digit tt = 0;
6880
    sp_int_digit t = SP_DIV_10_CONST;
6881
#endif
6882
5.55M
    sp_int_digit tr = 0;
6883
6884
    /* Check whether only mod value needed. */
6885
5.55M
    if (r == NULL) {
6886
        /* Divide starting at most significant word down to least. */
6887
661
        for (i = (int)(a->used - 1); i >= 0; i--) {
6888
583
    #ifndef SQR_MUL_ASM
6889
            /* Combine remainder from last operation with this word. */
6890
583
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6891
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
6892
583
            tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
6893
            /* Subtract trial division. */
6894
583
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
6895
    #else
6896
            /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
6897
            SP_ASM_MUL(l, tt, a->dp[i], t);
6898
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
6899
             */
6900
            tt += tr * SP_DIV_10_CONST;
6901
            /* Subtract trial division from digit. */
6902
            tr = a->dp[i] - (tt * 10);
6903
    #endif
6904
            /* tr is 0..99 but need 0..9 */
6905
            /* Fix up remainder. */
6906
583
            tr = tr % 10;
6907
583
        }
6908
78
        *rem = tr;
6909
78
    }
6910
    /* At least result needed - remainder is calculated anyway. */
6911
5.55M
    else {
6912
        /* Divide starting at most significant word down to least. */
6913
103M
        for (i = (int)(a->used - 1); i >= 0; i--) {
6914
98.2M
    #ifndef SQR_MUL_ASM
6915
            /* Combine remainder from last operation with this word. */
6916
98.2M
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6917
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
6918
98.2M
            tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
6919
            /* Subtract trial division. */
6920
98.2M
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
6921
    #else
6922
            /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
6923
            SP_ASM_MUL(l, tt, a->dp[i], t);
6924
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
6925
             */
6926
            tt += tr * SP_DIV_10_CONST;
6927
            /* Subtract trial division from digit. */
6928
            tr = a->dp[i] - (tt * 10);
6929
    #endif
6930
            /* tr is 0..99 but need 0..9 */
6931
            /* Fix up result. */
6932
98.2M
            tt += tr / 10;
6933
            /* Fix up remainder. */
6934
98.2M
            tr %= 10;
6935
            /* Store result of digit divided by 10. */
6936
98.2M
            r->dp[i] = tt;
6937
98.2M
        }
6938
6939
        /* Set the used amount to maximal amount. */
6940
5.55M
        r->used = a->used;
6941
        /* Remove leading zeros. */
6942
5.55M
        sp_clamp(r);
6943
        /* Return remainder if required. */
6944
5.55M
        if (rem != NULL) {
6945
5.55M
            *rem = tr;
6946
5.55M
        }
6947
5.55M
    }
6948
5.55M
}
6949
#endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
6950
6951
#if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
6952
/* Divide by small number: r = a / d and rem = a % d
6953
 *
6954
 * @param  [in]   a    SP integer to be divided.
6955
 * @param  [in]   d    Digit to divide by.
6956
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
6957
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
6958
 */
6959
static void _sp_div_small(const sp_int* a, sp_int_digit d, sp_int* r,
6960
    sp_int_digit* rem)
6961
15.7k
{
6962
15.7k
    int i;
6963
15.7k
#ifndef SQR_MUL_ASM
6964
15.7k
    sp_int_word t;
6965
15.7k
    sp_int_digit tt;
6966
#else
6967
    sp_int_digit l = 0;
6968
    sp_int_digit tt = 0;
6969
#endif
6970
15.7k
    sp_int_digit tr = 0;
6971
15.7k
    sp_int_digit m = SP_DIGIT_MAX / d;
6972
6973
15.7k
#ifndef WOLFSSL_SP_SMALL
6974
    /* Check whether only mod value needed. */
6975
15.7k
    if (r == NULL) {
6976
        /* Divide starting at most significant word down to least. */
6977
545k
        for (i = (int)(a->used - 1); i >= 0; i--) {
6978
529k
        #ifndef SQR_MUL_ASM
6979
            /* Combine remainder from last operation with this word. */
6980
529k
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6981
            /* Get top digit after multiplying. */
6982
529k
            tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
6983
            /* Subtract trial division. */
6984
529k
            tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
6985
        #else
6986
            /* Multiply digit. */
6987
            SP_ASM_MUL(l, tt, a->dp[i], m);
6988
            /* Add multiplied remainder to top digit. */
6989
            tt += tr * m;
6990
            /* Subtract trial division from digit. */
6991
            tr = a->dp[i] - (tt * d);
6992
        #endif
6993
            /* tr < d * d */
6994
            /* Fix up remainder. */
6995
529k
            tr = tr % d;
6996
529k
        }
6997
15.6k
        *rem = tr;
6998
15.6k
    }
6999
    /* At least result needed - remainder is calculated anyway. */
7000
100
    else
7001
100
#endif /* !WOLFSSL_SP_SMALL */
7002
100
    {
7003
        /* Divide starting at most significant word down to least. */
7004
1.14k
        for (i = (int)(a->used - 1); i >= 0; i--) {
7005
1.04k
        #ifndef SQR_MUL_ASM
7006
            /* Combine remainder from last operation with this word. */
7007
1.04k
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
7008
            /* Get top digit after multiplying. */
7009
1.04k
            tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
7010
            /* Subtract trial division. */
7011
1.04k
            tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
7012
        #else
7013
            /* Multiply digit. */
7014
            SP_ASM_MUL(l, tt, a->dp[i], m);
7015
            /* Add multiplied remainder to top digit. */
7016
            tt += tr * m;
7017
            /* Subtract trial division from digit. */
7018
            tr = a->dp[i] - (tt * d);
7019
        #endif
7020
            /* tr < d * d */
7021
            /* Fix up result. */
7022
1.04k
            tt += tr / d;
7023
            /* Fix up remainder. */
7024
1.04k
            tr %= d;
7025
            /* Store result of dividing the digit. */
7026
        #ifdef WOLFSSL_SP_SMALL
7027
            if (r != NULL)
7028
        #endif
7029
1.04k
            {
7030
1.04k
                r->dp[i] = tt;
7031
1.04k
            }
7032
1.04k
        }
7033
7034
    #ifdef WOLFSSL_SP_SMALL
7035
        if (r != NULL)
7036
    #endif
7037
100
        {
7038
            /* Set the used amount to maximal amount. */
7039
100
            r->used = a->used;
7040
            /* Remove leading zeros. */
7041
100
            sp_clamp(r);
7042
100
        }
7043
        /* Return remainder if required. */
7044
100
        if (rem != NULL) {
7045
43
            *rem = tr;
7046
43
        }
7047
100
    }
7048
15.7k
}
7049
#endif
7050
7051
#ifdef WOLFSSL_SP_DIV_D
7052
/* Divide a multi-precision number by a digit size number and calculate
7053
 * remainder.
7054
 *   r = a / d; rem = a % d
7055
 *
7056
 * Use trial division algorithm.
7057
 *
7058
 * @param  [in]   a    SP integer to be divided.
7059
 * @param  [in]   d    Digit to divide by.
7060
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
7061
 * @param  [out]  rem  Digit that is the remainder. May be NULL.
7062
 */
7063
static void _sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r,
7064
    sp_int_digit* rem)
7065
177
{
7066
177
    int i;
7067
177
#ifndef SQR_MUL_ASM
7068
177
    sp_int_word w = 0;
7069
#else
7070
    sp_int_digit l;
7071
    sp_int_digit h = 0;
7072
#endif
7073
177
    sp_int_digit t;
7074
7075
    /* Divide starting at most significant word down to least. */
7076
1.43k
    for (i = (int)(a->used - 1); i >= 0; i--) {
7077
1.26k
    #ifndef SQR_MUL_ASM
7078
        /* Combine remainder from last operation with this word and divide. */
7079
1.26k
        t = sp_div_word((sp_int_digit)w, a->dp[i], d);
7080
        /* Combine remainder from last operation with this word. */
7081
1.26k
        w = (w << SP_WORD_SIZE) | a->dp[i];
7082
        /* Subtract to get modulo result. */
7083
1.26k
        w -= (sp_int_word)t * d;
7084
    #else
7085
        /* Get current word. */
7086
        l = a->dp[i];
7087
        /* Combine remainder from last operation with this word and divide. */
7088
        t = sp_div_word(h, l, d);
7089
        /* Subtract to get modulo result. */
7090
        h = l - t * d;
7091
    #endif
7092
        /* Store result of dividing the digit. */
7093
1.26k
        if (r != NULL) {
7094
1.26k
            r->dp[i] = t;
7095
1.26k
        }
7096
1.26k
    }
7097
177
    if (r != NULL) {
7098
        /* Set the used amount to maximal amount. */
7099
177
        r->used = a->used;
7100
        /* Remove leading zeros. */
7101
177
        sp_clamp(r);
7102
177
    }
7103
7104
    /* Return remainder if required. */
7105
177
    if (rem != NULL) {
7106
80
    #ifndef SQR_MUL_ASM
7107
80
        *rem = (sp_int_digit)w;
7108
    #else
7109
        *rem = h;
7110
    #endif
7111
80
    }
7112
177
}
7113
7114
/* Divide a multi-precision number by a digit size number and calculate
7115
 * remainder.
7116
 *   r = a / d; rem = a % d
7117
 *
7118
 * @param  [in]   a    SP integer to be divided.
7119
 * @param  [in]   d    Digit to divide by.
7120
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
7121
 * @param  [out]  rem  Digit that is the remainder. May be NULL.
7122
 *
7123
 * @return  MP_OKAY on success.
7124
 * @return  MP_VAL when a is NULL or d is 0.
7125
 */
7126
int sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
7127
883k
{
7128
883k
    int err = MP_OKAY;
7129
7130
    /* Validate parameters. */
7131
883k
    if ((a == NULL) || (d == 0)) {
7132
7
        err = MP_VAL;
7133
7
    }
7134
    /* Check space for maximal sized result. */
7135
883k
    if ((err == MP_OKAY) && (r != NULL) && (a->used > r->size)) {
7136
2
        err = MP_VAL;
7137
2
    }
7138
7139
883k
    if (err == MP_OKAY) {
7140
883k
#if !defined(WOLFSSL_SP_SMALL)
7141
883k
    #if SP_WORD_SIZE < 64
7142
883k
        if (d == 3) {
7143
            /* Fast implementation for divisor of 3. */
7144
12
            _sp_div_3(a, r, rem);
7145
12
        }
7146
883k
        else
7147
883k
    #endif
7148
883k
        if (d == 10) {
7149
            /* Fast implementation for divisor of 10 - sp_todecimal(). */
7150
883k
            _sp_div_10(a, r, rem);
7151
883k
        }
7152
33
        else
7153
33
#endif
7154
33
        if (d <= SP_HALF_MAX) {
7155
            /* For small divisors. */
7156
23
            _sp_div_small(a, d, r, rem);
7157
23
        }
7158
10
        else
7159
10
        {
7160
10
            _sp_div_d(a, d, r, rem);
7161
10
        }
7162
7163
883k
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7164
883k
        if (r != NULL) {
7165
883k
            r->sign = a->sign;
7166
883k
        }
7167
883k
    #endif
7168
883k
    }
7169
7170
883k
    return err;
7171
883k
}
7172
#endif /* WOLFSSL_SP_DIV_D */
7173
7174
#ifdef WOLFSSL_SP_MOD_D
7175
/* Calculate a modulo the digit d into r: r = a mod d
7176
 *
7177
 * @param  [in]   a  SP integer to reduce.
7178
 * @param  [in]   d  Digit to that is the modulus.
7179
 * @param  [out]  r  Digit that is the result.
7180
 */
7181
static void _sp_mod_d(const sp_int* a, const sp_int_digit d, sp_int_digit* r)
7182
365k
{
7183
365k
    int i;
7184
365k
#ifndef SQR_MUL_ASM
7185
365k
    sp_int_word w = 0;
7186
#else
7187
    sp_int_digit h = 0;
7188
#endif
7189
7190
    /* Divide starting at most significant word down to least. */
7191
5.45M
    for (i = (int)(a->used - 1); i >= 0; i--) {
7192
5.08M
    #ifndef SQR_MUL_ASM
7193
        /* Combine remainder from last operation with this word and divide. */
7194
5.08M
        sp_int_digit t = sp_div_word((sp_int_digit)w, a->dp[i], d);
7195
        /* Combine remainder from last operation with this word. */
7196
5.08M
        w = (w << SP_WORD_SIZE) | a->dp[i];
7197
        /* Subtract to get modulo result. */
7198
5.08M
        w -= (sp_int_word)t * d;
7199
    #else
7200
        /* Combine remainder from last operation with this word and divide. */
7201
        sp_int_digit t = sp_div_word(h, a->dp[i], d);
7202
        /* Subtract to get modulo result. */
7203
        h = a->dp[i] - t * d;
7204
    #endif
7205
5.08M
    }
7206
7207
    /* Return remainder. */
7208
365k
#ifndef SQR_MUL_ASM
7209
365k
    *r = (sp_int_digit)w;
7210
#else
7211
    *r = h;
7212
#endif
7213
365k
}
7214
7215
/* Calculate a modulo the digit d into r: r = a mod d
7216
 *
7217
 * @param  [in]   a  SP integer to reduce.
7218
 * @param  [in]   d  Digit to that is the modulus.
7219
 * @param  [out]  r  Digit that is the result.
7220
 *
7221
 * @return  MP_OKAY on success.
7222
 * @return  MP_VAL when a is NULL or d is 0.
7223
 */
7224
#if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
7225
    !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
7226
static
7227
#endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
7228
int sp_mod_d(const sp_int* a, sp_int_digit d, sp_int_digit* r)
7229
125k
{
7230
125k
    int err = MP_OKAY;
7231
7232
    /* Validate parameters. */
7233
125k
    if ((a == NULL) || (r == NULL) || (d == 0)) {
7234
7
        err = MP_VAL;
7235
7
    }
7236
7237
#if 0
7238
    sp_print(a, "a");
7239
    sp_print_digit(d, "m");
7240
#endif
7241
7242
125k
    if (err == MP_OKAY) {
7243
        /* Check whether d is a power of 2. */
7244
125k
        if ((d & (d - 1)) == 0) {
7245
7.19k
            if (a->used == 0) {
7246
3
                *r = 0;
7247
3
            }
7248
7.18k
            else {
7249
7.18k
                *r = a->dp[0] & (d - 1);
7250
7.18k
            }
7251
7.19k
        }
7252
117k
#if !defined(WOLFSSL_SP_SMALL)
7253
117k
    #if SP_WORD_SIZE < 64
7254
117k
        else if (d == 3) {
7255
            /* Fast implementation for divisor of 3. */
7256
7.03k
            _sp_div_3(a, NULL, r);
7257
7.03k
        }
7258
110k
    #endif
7259
110k
        else if (d == 10) {
7260
            /* Fast implementation for divisor of 10. */
7261
18
            _sp_div_10(a, NULL, r);
7262
18
        }
7263
110k
#endif
7264
110k
        else if (d <= SP_HALF_MAX) {
7265
            /* For small divisors. */
7266
15.2k
            _sp_div_small(a, d, NULL, r);
7267
15.2k
        }
7268
95.5k
        else {
7269
95.5k
            _sp_mod_d(a, d, r);
7270
95.5k
        }
7271
7272
125k
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7273
125k
        if (a->sign == MP_NEG) {
7274
0
            *r = d - *r;
7275
0
        }
7276
125k
    #endif
7277
125k
    }
7278
7279
#if 0
7280
    sp_print_digit(*r, "rmod");
7281
#endif
7282
7283
125k
    return err;
7284
125k
}
7285
#endif /* WOLFSSL_SP_MOD_D */
7286
7287
#if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
7288
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
7289
     !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_SP_INVMOD)
7290
/* Divides a by 2 and stores in r: r = a >> 1
7291
 *
7292
 * @param  [in]   a  SP integer to divide.
7293
 * @param  [out]  r  SP integer to hold result.
7294
 */
7295
static void _sp_div_2(const sp_int* a, sp_int* r)
7296
26.8M
{
7297
26.8M
    int i;
7298
7299
    /* Shift down each word by 1 and include bottom bit of next at top. */
7300
221M
    for (i = 0; i < (int)a->used - 1; i++) {
7301
194M
        r->dp[i]  = a->dp[i] >> 1;
7302
194M
        r->dp[i] |= a->dp[i+1] << (SP_WORD_SIZE - 1);
7303
194M
    }
7304
    /* Last word only needs to be shifted down. */
7305
26.8M
    r->dp[i] = a->dp[i] >> 1;
7306
    /* Set used to be all words seen. */
7307
26.8M
    r->used = (sp_size_t)(i + 1);
7308
    /* Remove leading zeros. */
7309
26.8M
    sp_clamp(r);
7310
26.8M
#ifdef WOLFSSL_SP_INT_NEGATIVE
7311
    /* Same sign in result. */
7312
26.8M
    r->sign = a->sign;
7313
26.8M
#endif
7314
26.8M
}
7315
7316
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
7317
/* Divides a by 2 and stores in r: r = a >> 1
7318
 *
7319
 * @param  [in]   a  SP integer to divide.
7320
 * @param  [out]  r  SP integer to hold result.
7321
 *
7322
 * @return  MP_OKAY on success.
7323
 * @return  MP_VAL when a or r is NULL.
7324
 */
7325
int sp_div_2(const sp_int* a, sp_int* r)
7326
46.4k
{
7327
46.4k
    int err = MP_OKAY;
7328
7329
    /* Only when a public API. */
7330
46.4k
    if ((a == NULL) || (r == NULL)) {
7331
0
        err = MP_VAL;
7332
0
    }
7333
    /* Ensure maximal size is supported by result. */
7334
46.4k
    if ((err == MP_OKAY) && (a->used > r->size)) {
7335
3
        err = MP_VAL;
7336
3
    }
7337
7338
46.4k
    if (err == MP_OKAY) {
7339
46.4k
        _sp_div_2(a, r);
7340
46.4k
    }
7341
7342
46.4k
    return err;
7343
46.4k
}
7344
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
7345
#endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
7346
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7347
7348
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
7349
/* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
7350
 *
7351
 * r = a / 2 (mod m) - constant time (a < m and positive)
7352
 *
7353
 * @param  [in]   a  SP integer to divide.
7354
 * @param  [in]   m  SP integer that is modulus.
7355
 * @param  [out]  r  SP integer to hold result.
7356
 *
7357
 * @return  MP_OKAY on success.
7358
 * @return  MP_VAL when a, m or r is NULL.
7359
 */
7360
int sp_div_2_mod_ct(const sp_int* a, const sp_int* m, sp_int* r)
7361
14.0M
{
7362
14.0M
    int err = MP_OKAY;
7363
7364
    /* Validate parameters. */
7365
14.0M
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
7366
0
        err = MP_VAL;
7367
0
    }
7368
    /* Check result has enough space for a + m. */
7369
14.0M
    if ((err == MP_OKAY) && (m->used + 1 > r->size)) {
7370
15
        err = MP_VAL;
7371
15
    }
7372
7373
14.0M
    if (err == MP_OKAY) {
7374
14.0M
    #ifndef SQR_MUL_ASM
7375
14.0M
        sp_int_word  w = 0;
7376
    #else
7377
        sp_int_digit l = 0;
7378
        sp_int_digit h;
7379
        sp_int_digit t;
7380
    #endif
7381
        /* Mask to apply to modulus. */
7382
14.0M
        volatile sp_int_digit mask = (sp_int_digit)0 - (a->dp[0] & 1);
7383
14.0M
        sp_size_t i;
7384
7385
    #if 0
7386
        sp_print(a, "a");
7387
        sp_print(m, "m");
7388
    #endif
7389
7390
        /* Add a to m, if a is odd, into r in constant time. */
7391
126M
        for (i = 0; i < m->used; i++) {
7392
            /* Mask to apply to a - set when used value at index. */
7393
112M
            volatile sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
7394
7395
112M
        #ifndef SQR_MUL_ASM
7396
            /* Conditionally add modulus. */
7397
112M
            w         += m->dp[i] & mask;
7398
            /* Conditionally add a. */
7399
112M
            w         += a->dp[i] & mask_a;
7400
            /* Store low digit in result. */
7401
112M
            r->dp[i]   = (sp_int_digit)w;
7402
            /* Move high digit down. */
7403
112M
            w        >>= DIGIT_BIT;
7404
        #else
7405
            /* No high digit. */
7406
            h        = 0;
7407
            /* Conditionally use modulus. */
7408
            t        = m->dp[i] & mask;
7409
            /* Add with carry modulus. */
7410
            SP_ASM_ADDC_REG(l, h, t);
7411
            /* Conditionally use a. */
7412
            t        = a->dp[i] & mask_a;
7413
            /* Add with carry a. */
7414
            SP_ASM_ADDC_REG(l, h, t);
7415
            /* Store low digit in result. */
7416
            r->dp[i] = l;
7417
            /* Move high digit down. */
7418
            l        = h;
7419
        #endif
7420
112M
        }
7421
        /* Store carry. */
7422
14.0M
    #ifndef SQR_MUL_ASM
7423
14.0M
        r->dp[i] = (sp_int_digit)w;
7424
    #else
7425
        r->dp[i] = l;
7426
    #endif
7427
        /* Used includes carry - set or not. */
7428
14.0M
        r->used = (sp_size_t)(i + 1);
7429
14.0M
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7430
14.0M
        r->sign = MP_ZPOS;
7431
14.0M
    #endif
7432
        /* Divide conditional sum by 2. */
7433
14.0M
        _sp_div_2(r, r);
7434
7435
    #if 0
7436
        sp_print(r, "rd2");
7437
    #endif
7438
14.0M
    }
7439
7440
14.0M
    return err;
7441
14.0M
}
7442
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
7443
7444
/************************
7445
 * Add/Subtract Functions
7446
 ************************/
7447
7448
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
7449
/* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
7450
 *
7451
 * @param  [in]   a  SP integer to add to.
7452
 * @param  [in]   b  SP integer to add.
7453
 * @param  [out]  r  SP integer to store result in.
7454
 * @param  [in]   o  Number of digits to offset b.
7455
 */
7456
static void _sp_add_off(const sp_int* a, const sp_int* b, sp_int* r, int o)
7457
15.9M
{
7458
15.9M
    sp_size_t i = 0;
7459
15.9M
#ifndef SQR_MUL_ASM
7460
15.9M
    sp_int_word t = 0;
7461
#else
7462
    sp_int_digit l = 0;
7463
    sp_int_digit h = 0;
7464
    sp_int_digit t = 0;
7465
#endif
7466
7467
#ifdef SP_MATH_NEED_ADD_OFF
7468
    unsigned int j;
7469
7470
    /* Copy a into result up to offset. */
7471
    for (; (i < o) && (i < a->used); i++) {
7472
        r->dp[i] = a->dp[i];
7473
    }
7474
    /* Set result to 0 for digits beyonf those in a. */
7475
    for (; i < o; i++) {
7476
        r->dp[i] = 0;
7477
    }
7478
7479
    /* Add each digit from a and b where both have values. */
7480
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
7481
    #ifndef SQR_MUL_ASM
7482
        t += a->dp[i];
7483
        t += b->dp[j];
7484
        r->dp[i] = (sp_int_digit)t;
7485
        t >>= SP_WORD_SIZE;
7486
    #else
7487
        t = a->dp[i];
7488
        SP_ASM_ADDC(l, h, t);
7489
        t = b->dp[j];
7490
        SP_ASM_ADDC(l, h, t);
7491
        r->dp[i] = l;
7492
        l = h;
7493
        h = 0;
7494
    #endif
7495
    }
7496
    /* Either a and/or b are out of digits. Add carry and remaining a digits. */
7497
    for (; i < a->used; i++) {
7498
    #ifndef SQR_MUL_ASM
7499
        t += a->dp[i];
7500
        r->dp[i] = (sp_int_digit)t;
7501
        t >>= SP_WORD_SIZE;
7502
    #else
7503
        t = a->dp[i];
7504
        SP_ASM_ADDC(l, h, t);
7505
        r->dp[i] = l;
7506
        l = h;
7507
        h = 0;
7508
    #endif
7509
    }
7510
    /* a is out of digits. Add carry and remaining b digits. */
7511
    for (; j < b->used; i++, j++) {
7512
    #ifndef SQR_MUL_ASM
7513
        t += b->dp[j];
7514
        r->dp[i] = (sp_int_digit)t;
7515
        t >>= SP_WORD_SIZE;
7516
    #else
7517
        t = b->dp[j];
7518
        SP_ASM_ADDC(l, h, t);
7519
        r->dp[i] = l;
7520
        l = h;
7521
        h = 0;
7522
    #endif
7523
    }
7524
#else
7525
15.9M
    (void)o;
7526
7527
    /* Add each digit from a and b where both have values. */
7528
231M
    for (; (i < a->used) && (i < b->used); i++) {
7529
215M
    #ifndef SQR_MUL_ASM
7530
215M
        t += a->dp[i];
7531
215M
        t += b->dp[i];
7532
215M
        r->dp[i] = (sp_int_digit)t;
7533
215M
        t >>= SP_WORD_SIZE;
7534
    #else
7535
        t = a->dp[i];
7536
        SP_ASM_ADDC(l, h, t);
7537
        t = b->dp[i];
7538
        SP_ASM_ADDC(l, h, t);
7539
        r->dp[i] = l;
7540
        l = h;
7541
        h = 0;
7542
    #endif
7543
215M
    }
7544
    /* Either a and/or b are out of digits. Add carry and remaining a digits. */
7545
16.1M
    for (; i < a->used; i++) {
7546
176k
    #ifndef SQR_MUL_ASM
7547
176k
        t += a->dp[i];
7548
176k
        r->dp[i] = (sp_int_digit)t;
7549
176k
        t >>= SP_WORD_SIZE;
7550
    #else
7551
        t = a->dp[i];
7552
        SP_ASM_ADDC(l, h, t);
7553
        r->dp[i] = l;
7554
        l = h;
7555
        h = 0;
7556
    #endif
7557
176k
    }
7558
    /* a is out of digits. Add carry and remaining b digits. */
7559
22.7M
    for (; i < b->used; i++) {
7560
6.74M
    #ifndef SQR_MUL_ASM
7561
6.74M
        t += b->dp[i];
7562
6.74M
        r->dp[i] = (sp_int_digit)t;
7563
6.74M
        t >>= SP_WORD_SIZE;
7564
    #else
7565
        t = b->dp[i];
7566
        SP_ASM_ADDC(l, h, t);
7567
        r->dp[i] = l;
7568
        l = h;
7569
        h = 0;
7570
    #endif
7571
6.74M
    }
7572
15.9M
#endif
7573
7574
    /* Set used based on last digit put in. */
7575
15.9M
    r->used = i;
7576
    /* Put in carry. */
7577
15.9M
#ifndef SQR_MUL_ASM
7578
15.9M
    r->dp[i] = (sp_int_digit)t;
7579
15.9M
    r->used = (sp_size_t)(r->used + (sp_size_t)(t != 0));
7580
#else
7581
    r->dp[i] = l;
7582
    r->used = (sp_size_t)(r->used + (sp_size_t)(l != 0));
7583
#endif
7584
7585
    /* Remove leading zeros. */
7586
15.9M
    sp_clamp(r);
7587
15.9M
}
7588
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
7589
7590
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
7591
    !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
7592
    !defined(WOLFSSL_RSA_VERIFY_ONLY))
7593
/* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
7594
 * a must be greater than b.
7595
 *
7596
 * When using offset, r == a is faster.
7597
 *
7598
 * @param  [in]   a  SP integer to subtract from.
7599
 * @param  [in]   b  SP integer to subtract.
7600
 * @param  [out]  r  SP integer to store result in.
7601
 * @param  [in]   o  Number of digits to offset b.
7602
 */
7603
static void _sp_sub_off(const sp_int* a, const sp_int* b, sp_int* r,
7604
    sp_size_t o)
7605
99.8M
{
7606
99.8M
    sp_size_t i = 0;
7607
99.8M
    sp_size_t j;
7608
99.8M
#ifndef SQR_MUL_ASM
7609
99.8M
    sp_int_sword t = 0;
7610
#else
7611
    sp_int_digit l = 0;
7612
    sp_int_digit h = 0;
7613
#endif
7614
7615
    /* Need to copy digits up to offset into result. */
7616
99.8M
    if (r != a) {
7617
6.90M
        for (; (i < o) && (i < a->used); i++) {
7618
0
            r->dp[i] = a->dp[i];
7619
0
        }
7620
6.90M
    }
7621
92.9M
    else {
7622
92.9M
        i = o;
7623
92.9M
    }
7624
    /* Index to add at is the offset now. */
7625
7626
903M
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
7627
803M
    #ifndef SQR_MUL_ASM
7628
        /* Add a into and subtract b from current value. */
7629
803M
        t += a->dp[i];
7630
803M
        t -= b->dp[j];
7631
        /* Store low digit in result. */
7632
803M
        r->dp[i] = (sp_int_digit)t;
7633
        /* Move high digit down. */
7634
803M
        t >>= SP_WORD_SIZE;
7635
    #else
7636
        /* Add a into and subtract b from current value. */
7637
        SP_ASM_ADDC(l, h, a->dp[i]);
7638
        SP_ASM_SUBB(l, h, b->dp[j]);
7639
        /* Store low digit in result. */
7640
        r->dp[i] = l;
7641
        /* Move high digit down. */
7642
        l = h;
7643
        /* High digit is 0 when positive or -1 on negative. */
7644
        h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
7645
    #endif
7646
803M
    }
7647
148M
    for (; i < a->used; i++) {
7648
49.0M
    #ifndef SQR_MUL_ASM
7649
        /* Add a into current value. */
7650
49.0M
        t += a->dp[i];
7651
        /* Store low digit in result. */
7652
49.0M
        r->dp[i] = (sp_int_digit)t;
7653
        /* Move high digit down. */
7654
49.0M
        t >>= SP_WORD_SIZE;
7655
    #else
7656
        /* Add a into current value. */
7657
        SP_ASM_ADDC(l, h, a->dp[i]);
7658
        /* Store low digit in result. */
7659
        r->dp[i] = l;
7660
        /* Move high digit down. */
7661
        l = h;
7662
        /* High digit is 0 when positive or -1 on negative. */
7663
        h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
7664
    #endif
7665
49.0M
    }
7666
7667
    /* Set used based on last digit put in. */
7668
99.8M
    r->used = i;
7669
    /* Remove leading zeros. */
7670
99.8M
    sp_clamp(r);
7671
99.8M
}
7672
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
7673
        * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7674
7675
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
7676
/* Add b to a into r: r = a + b
7677
 *
7678
 * @param  [in]   a  SP integer to add to.
7679
 * @param  [in]   b  SP integer to add.
7680
 * @param  [out]  r  SP integer to store result in.
7681
 *
7682
 * @return  MP_OKAY on success.
7683
 * @return  MP_VAL when a, b, or r is NULL.
7684
 */
7685
int sp_add(const sp_int* a, const sp_int* b, sp_int* r)
7686
259k
{
7687
259k
    int err = MP_OKAY;
7688
7689
    /* Validate parameters. */
7690
259k
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
7691
0
        err = MP_VAL;
7692
0
    }
7693
    /* Check that r as big as a and b plus one word. */
7694
259k
    if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
7695
38
        err = MP_VAL;
7696
38
    }
7697
7698
259k
    if (err == MP_OKAY) {
7699
    #ifndef WOLFSSL_SP_INT_NEGATIVE
7700
        /* Add two positive numbers. */
7701
        _sp_add_off(a, b, r, 0);
7702
    #else
7703
        /* Same sign then add absolute values and use sign. */
7704
259k
        if (a->sign == b->sign) {
7705
238k
            _sp_add_off(a, b, r, 0);
7706
238k
            r->sign = a->sign;
7707
238k
        }
7708
        /* Different sign and abs(a) >= abs(b). */
7709
20.7k
        else if (_sp_cmp_abs(a, b) != MP_LT) {
7710
            /* Subtract absolute values and use sign of a unless result 0. */
7711
220
            _sp_sub_off(a, b, r, 0);
7712
220
            if (sp_iszero(r)) {
7713
18
                r->sign = MP_ZPOS;
7714
18
            }
7715
202
            else {
7716
202
                r->sign = a->sign;
7717
202
            }
7718
220
        }
7719
        /* Different sign and abs(a) < abs(b). */
7720
20.5k
        else {
7721
            /* Reverse subtract absolute values and use sign of b. */
7722
20.5k
            _sp_sub_off(b, a, r, 0);
7723
20.5k
            r->sign = b->sign;
7724
20.5k
        }
7725
259k
    #endif
7726
259k
    }
7727
7728
259k
    return err;
7729
259k
}
7730
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
7731
7732
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
7733
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
7734
/* Subtract b from a into r: r = a - b
7735
 *
7736
 * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
7737
 *
7738
 * @param  [in]   a  SP integer to subtract from.
7739
 * @param  [in]   b  SP integer to subtract.
7740
 * @param  [out]  r  SP integer to store result in.
7741
 *
7742
 * @return  MP_OKAY on success.
7743
 * @return  MP_VAL when a, b, or r is NULL.
7744
 */
7745
int sp_sub(const sp_int* a, const sp_int* b, sp_int* r)
7746
16.4M
{
7747
16.4M
    int err = MP_OKAY;
7748
7749
    /* Validate parameters. */
7750
16.4M
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
7751
0
        err = MP_VAL;
7752
0
    }
7753
    /* Check that r as big as a and b plus one word. */
7754
16.4M
    if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
7755
52
        err = MP_VAL;
7756
52
    }
7757
7758
16.4M
    if (err == MP_OKAY) {
7759
    #ifndef WOLFSSL_SP_INT_NEGATIVE
7760
        /* Subtract positive numbers b from a. */
7761
        _sp_sub_off(a, b, r, 0);
7762
    #else
7763
        /* Different sign. */
7764
16.4M
        if (a->sign != b->sign) {
7765
            /* Add absolute values and use sign of a. */
7766
10.0M
            _sp_add_off(a, b, r, 0);
7767
10.0M
            r->sign = a->sign;
7768
10.0M
        }
7769
        /* Same sign and abs(a) >= abs(b). */
7770
6.36M
        else if (_sp_cmp_abs(a, b) != MP_LT) {
7771
            /* Subtract absolute values and use sign of a unless result 0. */
7772
6.34M
            _sp_sub_off(a, b, r, 0);
7773
6.34M
            if (sp_iszero(r)) {
7774
535
                r->sign = MP_ZPOS;
7775
535
            }
7776
6.34M
            else {
7777
6.34M
                r->sign = a->sign;
7778
6.34M
            }
7779
6.34M
        }
7780
        /* Same sign and abs(a) < abs(b). */
7781
16.1k
        else {
7782
            /* Reverse subtract absolute values and use opposite sign of a */
7783
16.1k
            _sp_sub_off(b, a, r, 0);
7784
16.1k
            r->sign = 1 - a->sign;
7785
16.1k
        }
7786
16.4M
    #endif
7787
16.4M
    }
7788
7789
16.4M
    return err;
7790
16.4M
}
7791
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
7792
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
7793
7794
/****************************
7795
 * Add/Subtract mod functions
7796
 ****************************/
7797
7798
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
7799
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
7800
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
7801
/* Add two value and reduce: r = (a + b) % m
7802
 *
7803
 * @param  [in]   a  SP integer to add.
7804
 * @param  [in]   b  SP integer to add with.
7805
 * @param  [in]   m  SP integer that is the modulus.
7806
 * @param  [out]  r  SP integer to hold result.
7807
 *
7808
 * @return  MP_OKAY on success.
7809
 * @return  MP_MEM when dynamic memory allocation fails.
7810
 */
7811
static int _sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m,
7812
    sp_int* r)
7813
5.30k
{
7814
5.30k
    int err = MP_OKAY;
7815
    /* Calculate used based on digits used in a and b. */
7816
5.30k
    sp_size_t used = (sp_size_t)(((a->used >= b->used) ? a->used + 1U : b->used + 1U));
7817
5.30k
    DECL_SP_INT(t, used);
7818
7819
    /* Allocate a temporary SP int to hold sum. */
7820
5.30k
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
7821
7822
5.30k
    if (err == MP_OKAY) {
7823
        /* Do sum. */
7824
5.28k
        err = sp_add(a, b, t);
7825
5.28k
    }
7826
5.30k
    if (err == MP_OKAY) {
7827
        /* Mod result. */
7828
5.28k
        err = sp_mod(t, m, r);
7829
5.28k
    }
7830
7831
5.30k
    FREE_SP_INT(t, NULL);
7832
5.30k
    return err;
7833
5.30k
}
7834
7835
/* Add two value and reduce: r = (a + b) % m
7836
 *
7837
 * @param  [in]   a  SP integer to add.
7838
 * @param  [in]   b  SP integer to add with.
7839
 * @param  [in]   m  SP integer that is the modulus.
7840
 * @param  [out]  r  SP integer to hold result.
7841
 *
7842
 * @return  MP_OKAY on success.
7843
 * @return  MP_VAL when a, b, m or r is NULL.
7844
 * @return  MP_MEM when dynamic memory allocation fails.
7845
 */
7846
int sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
7847
5.31k
{
7848
5.31k
    int err = MP_OKAY;
7849
7850
    /* Validate parameters. */
7851
5.31k
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
7852
0
        err = MP_VAL;
7853
0
    }
7854
    /* Ensure a and b aren't too big a number to operate on. */
7855
5.31k
    else if (a->used >= SP_INT_DIGITS) {
7856
6
        err = MP_VAL;
7857
6
    }
7858
5.30k
    else if (b->used >= SP_INT_DIGITS) {
7859
6
        err = MP_VAL;
7860
6
    }
7861
7862
7863
#if 0
7864
    if (err == MP_OKAY) {
7865
        sp_print(a, "a");
7866
        sp_print(b, "b");
7867
        sp_print(m, "m");
7868
    }
7869
#endif
7870
5.31k
    if (err == MP_OKAY) {
7871
        /* Do add and modular reduction. */
7872
5.30k
        err = _sp_addmod(a, b, m, r);
7873
5.30k
    }
7874
#if 0
7875
    if (err == MP_OKAY) {
7876
        sp_print(r, "rma");
7877
    }
7878
#endif
7879
7880
5.31k
    return err;
7881
5.31k
}
7882
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
7883
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
7884
7885
#if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
7886
    defined(HAVE_ECC))
7887
/* Sub b from a and reduce: r = (a - b) % m
7888
 * Result is always positive.
7889
 *
7890
 * @param  [in]   a  SP integer to subtract from
7891
 * @param  [in]   b  SP integer to subtract.
7892
 * @param  [in]   m  SP integer that is the modulus.
7893
 * @param  [out]  r  SP integer to hold result.
7894
 *
7895
 * @return  MP_OKAY on success.
7896
 * @return  MP_MEM when dynamic memory allocation fails.
7897
 */
7898
static int _sp_submod(const sp_int* a, const sp_int* b, const sp_int* m,
7899
    sp_int* r)
7900
5.53M
{
7901
5.53M
    int err = MP_OKAY;
7902
#ifndef WOLFSSL_SP_INT_NEGATIVE
7903
    unsigned int used = ((a->used >= m->used) ?
7904
        ((a->used >= b->used) ? (a->used + 1U) : (b->used + 1U)) :
7905
        ((b->used >= m->used)) ? (b->used + 1U) : (m->used + 1U));
7906
    DECL_SP_INT(t0, used);
7907
    DECL_SP_INT(t1, used);
7908
7909
    ALLOC_SP_INT_SIZE(t0, used, err, NULL);
7910
    ALLOC_SP_INT_SIZE(t1, used, err, NULL);
7911
    if (err == MP_OKAY) {
7912
        /* Reduce a to less than m. */
7913
        if (_sp_cmp(a, m) != MP_LT) {
7914
            err = sp_mod(a, m, t0);
7915
            a = t0;
7916
        }
7917
    }
7918
    if (err == MP_OKAY) {
7919
        /* Reduce b to less than m. */
7920
        if (_sp_cmp(b, m) != MP_LT) {
7921
            err = sp_mod(b, m, t1);
7922
            b = t1;
7923
        }
7924
    }
7925
    if (err == MP_OKAY) {
7926
        /* Add m to a if a smaller than b. */
7927
        if (_sp_cmp(a, b) == MP_LT) {
7928
            err = sp_add(a, m, t0);
7929
            a = t0;
7930
        }
7931
    }
7932
    if (err == MP_OKAY) {
7933
        /* Subtract b from a. */
7934
        err = sp_sub(a, b, r);
7935
    }
7936
7937
    FREE_SP_INT(t0, NULL);
7938
    FREE_SP_INT(t1, NULL);
7939
#else /* WOLFSSL_SP_INT_NEGATIVE */
7940
5.53M
    sp_size_t used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
7941
5.53M
    DECL_SP_INT(t, used);
7942
7943
5.53M
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
7944
    /* Subtract b from a into temporary. */
7945
5.53M
    if (err == MP_OKAY) {
7946
5.53M
        err = sp_sub(a, b, t);
7947
5.53M
    }
7948
5.53M
    if (err == MP_OKAY) {
7949
        /* Reduce result mod m into result. */
7950
5.53M
        err = sp_mod(t, m, r);
7951
5.53M
    }
7952
5.53M
    FREE_SP_INT(t, NULL);
7953
5.53M
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7954
7955
5.53M
    return err;
7956
5.53M
}
7957
7958
/* Sub b from a and reduce: r = (a - b) % m
7959
 * Result is always positive.
7960
 *
7961
 * @param  [in]   a  SP integer to subtract from
7962
 * @param  [in]   b  SP integer to subtract.
7963
 * @param  [in]   m  SP integer that is the modulus.
7964
 * @param  [out]  r  SP integer to hold result.
7965
 *
7966
 * @return  MP_OKAY on success.
7967
 * @return  MP_VAL when a, b, m or r is NULL.
7968
 * @return  MP_MEM when dynamic memory allocation fails.
7969
 */
7970
int sp_submod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
7971
6.81M
{
7972
6.81M
    int err = MP_OKAY;
7973
    /* Validate parameters. */
7974
6.81M
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
7975
0
        err = MP_VAL;
7976
0
    }
7977
    /* Ensure a, b and m aren't too big a number to operate on. */
7978
6.81M
    else if (a->used >= SP_INT_DIGITS) {
7979
6
        err = MP_VAL;
7980
6
    }
7981
6.81M
    else if (b->used >= SP_INT_DIGITS) {
7982
6
        err = MP_VAL;
7983
6
    }
7984
6.81M
    else if (m->used >= SP_INT_DIGITS) {
7985
6
        err = MP_VAL;
7986
6
    }
7987
7988
#if 0
7989
    if (err == MP_OKAY) {
7990
        sp_print(a, "a");
7991
        sp_print(b, "b");
7992
        sp_print(m, "m");
7993
    }
7994
#endif
7995
6.81M
    if (err == MP_OKAY) {
7996
        /* Do submod. */
7997
6.81M
        err = _sp_submod(a, b, m, r);
7998
6.81M
    }
7999
#if 0
8000
    if (err == MP_OKAY) {
8001
        sp_print(r, "rms");
8002
    }
8003
#endif
8004
8005
6.81M
    return err;
8006
6.81M
}
8007
#endif /* WOLFSSL_SP_MATH_ALL */
8008
8009
/* Constant time clamping.
8010
 *
8011
 * @param [in, out] a  SP integer to clamp.
8012
 */
8013
static void sp_clamp_ct(sp_int* a)
8014
144M
{
8015
144M
    int i;
8016
144M
    sp_size_t used = a->used;
8017
144M
    volatile sp_size_t mask = (sp_size_t)-1;
8018
8019
1.30G
    for (i = (int)a->used - 1; i >= 0; i--) {
8020
#if ((SP_WORD_SIZE == 64) && \
8021
     (defined(_WIN64) || !defined(WOLFSSL_UINT128_T_DEFINED))) || \
8022
    ((SP_WORD_SIZE == 32) && defined(NO_64BIT))
8023
        sp_int_digit negVal = ~a->dp[i];
8024
        sp_int_digit minusOne = a->dp[i] - 1;
8025
        sp_int_digit zeroMask =
8026
            (sp_int_digit)((sp_int_sdigit)(negVal & minusOne) >>
8027
                           (SP_WORD_SIZE - 1));
8028
#else
8029
1.15G
        sp_size_t zeroMask =
8030
1.15G
            (sp_size_t)((((sp_int_sword)a->dp[i]) - 1) >> SP_WORD_SIZE);
8031
1.15G
#endif
8032
1.15G
        mask &= (sp_size_t)zeroMask;
8033
1.15G
        used = (sp_size_t)(used + mask);
8034
1.15G
    }
8035
144M
    a->used = used;
8036
144M
}
8037
8038
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
8039
/* Add two value and reduce: r = (a + b) % m
8040
 *
8041
 * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
8042
 *
8043
 * Assumes a, b, m and r are not NULL.
8044
 * m and r must not be the same pointer.
8045
 *
8046
 * @param  [in]   a  SP integer to add.
8047
 * @param  [in]   b  SP integer to add with.
8048
 * @param  [in]   m  SP integer that is the modulus.
8049
 * @param  [out]  r  SP integer to hold result.
8050
 *
8051
 * @return  MP_OKAY on success.
8052
 */
8053
int sp_addmod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8054
34.3M
{
8055
34.3M
    int err = MP_OKAY;
8056
34.3M
#ifndef SQR_MUL_ASM
8057
34.3M
    sp_int_sword w;
8058
34.3M
    sp_int_sword s;
8059
#else
8060
    sp_int_digit wl;
8061
    sp_int_digit wh;
8062
    sp_int_digit sl;
8063
    sp_int_digit sh;
8064
    sp_int_digit t;
8065
#endif
8066
34.3M
    volatile sp_int_digit mask;
8067
34.3M
    volatile sp_int_digit mask_a = (sp_int_digit)-1;
8068
34.3M
    volatile sp_int_digit mask_b = (sp_int_digit)-1;
8069
34.3M
    sp_size_t i;
8070
8071
    /* Check result is as big as modulus. */
8072
34.3M
    if (m->used > r->size) {
8073
4
        err = MP_VAL;
8074
4
    }
8075
    /* Validate parameters. */
8076
34.3M
    if ((err == MP_OKAY) && (r == m)) {
8077
1
        err = MP_VAL;
8078
1
    }
8079
8080
34.3M
    if (err == MP_OKAY) {
8081
#if 0
8082
        sp_print(a, "a");
8083
        sp_print(b, "b");
8084
        sp_print(m, "m");
8085
#endif
8086
8087
        /* Add a to b into r. Do the subtract of modulus but don't store result.
8088
         * When subtract result is negative, the overflow will be negative.
8089
         * Only need to subtract mod when result is positive - overflow is
8090
         * positive.
8091
         */
8092
34.3M
    #ifndef SQR_MUL_ASM
8093
34.3M
        w = 0;
8094
34.3M
        s = 0;
8095
    #else
8096
        wl = 0;
8097
        sl = 0;
8098
        sh = 0;
8099
    #endif
8100
        /* Constant time - add modulus digits worth from a and b. */
8101
388M
        for (i = 0; i < m->used; i++) {
8102
            /* Values past 'used' are not initialized. */
8103
354M
            mask_a += (i == a->used);
8104
354M
            mask_b += (i == b->used);
8105
8106
354M
        #ifndef SQR_MUL_ASM
8107
            /* Add next digits from a and b to current value. */
8108
354M
            w         += a->dp[i] & mask_a;
8109
354M
            w         += b->dp[i] & mask_b;
8110
            /* Store low digit in result. */
8111
354M
            r->dp[i]   = (sp_int_digit)w;
8112
            /* Add result to reducing value. */
8113
354M
            s         += (sp_int_digit)w;
8114
            /* Subtract next digit of modulus. */
8115
354M
            s         -= m->dp[i];
8116
            /* Move high digit of reduced result down. */
8117
354M
            s        >>= DIGIT_BIT;
8118
            /* Move high digit of sum result down. */
8119
354M
            w        >>= DIGIT_BIT;
8120
        #else
8121
            wh = 0;
8122
            /* Add next digits from a and b to current value. */
8123
            t = a->dp[i] & mask_a;
8124
            SP_ASM_ADDC_REG(wl, wh, t);
8125
            t = b->dp[i] & mask_b;
8126
            SP_ASM_ADDC_REG(wl, wh, t);
8127
            /* Store low digit in result. */
8128
            r->dp[i] = wl;
8129
            /* Add result to reducing value. */
8130
            SP_ASM_ADDC_REG(sl, sh, wl);
8131
            /* Subtract next digit of modulus. */
8132
            SP_ASM_SUBB(sl, sh, m->dp[i]);
8133
            /* Move high digit of reduced result down. */
8134
            sl = sh;
8135
            /* High digit is 0 when positive or -1 on negative. */
8136
            sh = (sp_int_digit)0 - (sh >> (SP_WORD_SIZE-1));
8137
            /* Move high digit of sum result down. */
8138
            wl = wh;
8139
        #endif
8140
354M
        }
8141
34.3M
    #ifndef SQR_MUL_ASM
8142
        /* Add carry into reduced result. */
8143
34.3M
        s += (sp_int_digit)w;
8144
        /* s will be positive when subtracting modulus is needed. */
8145
34.3M
        mask = (sp_int_digit)0 - (s >= 0);
8146
    #else
8147
        /* Add carry into reduced result. */
8148
        SP_ASM_ADDC_REG(sl, sh, wl);
8149
        /* s will be positive when subtracting modulus is needed. */
8150
        mask = (sh >> (SP_WORD_SIZE-1)) - 1;
8151
    #endif
8152
8153
        /* Constant time, conditionally, subtract modulus from sum. */
8154
34.3M
    #ifndef SQR_MUL_ASM
8155
34.3M
        w = 0;
8156
    #else
8157
        wl = 0;
8158
        wh = 0;
8159
    #endif
8160
388M
        for (i = 0; i < m->used; i++) {
8161
354M
        #ifndef SQR_MUL_ASM
8162
            /* Add result to current value and conditionally subtract modulus.
8163
             */
8164
354M
            w         += r->dp[i];
8165
354M
            w         -= m->dp[i] & mask;
8166
            /* Store low digit in result. */
8167
354M
            r->dp[i]   = (sp_int_digit)w;
8168
            /* Move high digit of sum result down. */
8169
354M
            w        >>= DIGIT_BIT;
8170
        #else
8171
            /* Add result to current value and conditionally subtract modulus.
8172
             */
8173
            SP_ASM_ADDC(wl, wh, r->dp[i]);
8174
            t = m->dp[i] & mask;
8175
            SP_ASM_SUBB_REG(wl, wh, t);
8176
            /* Store low digit in result. */
8177
            r->dp[i] = wl;
8178
            /* Move high digit of sum result down. */
8179
            wl = wh;
8180
            /* High digit is 0 when positive or -1 on negative. */
8181
            wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
8182
        #endif
8183
354M
        }
8184
        /* Result will always have digits equal to or less than those in
8185
         * modulus. */
8186
34.3M
        r->used = i;
8187
34.3M
    #ifdef WOLFSSL_SP_INT_NEGATIVE
8188
34.3M
        r->sign = MP_ZPOS;
8189
34.3M
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
8190
        /* Remove leading zeros. */
8191
34.3M
        sp_clamp_ct(r);
8192
8193
#if 0
8194
        sp_print(r, "rma");
8195
#endif
8196
34.3M
    }
8197
8198
34.3M
    return err;
8199
34.3M
}
8200
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
8201
8202
#if (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)) || \
8203
    (defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
8204
     defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
8205
     defined(OPENSSL_ALL))
8206
/* Sub b from a modulo m: r = (a - b) % m
8207
 *
8208
 * Result is always positive.
8209
 *
8210
 * Assumes a, b, m and r are not NULL.
8211
 * m and r must not be the same pointer.
8212
 *
8213
 * @param  [in]   a  SP integer to subtract from
8214
 * @param  [in]   b  SP integer to subtract.
8215
 * @param  [in]   m  SP integer that is the modulus.
8216
 * @param  [out]  r  SP integer to hold result.
8217
 *
8218
 * @return  MP_OKAY on success.
8219
 */
8220
static void _sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m,
8221
    unsigned int max_size, sp_int* r)
8222
45.2M
{
8223
45.2M
#ifndef SQR_MUL_ASM
8224
45.2M
    sp_int_sword w;
8225
#else
8226
    sp_int_digit l;
8227
    sp_int_digit h;
8228
    sp_int_digit t;
8229
#endif
8230
45.2M
    volatile sp_int_digit mask;
8231
45.2M
    volatile sp_int_digit mask_a = (sp_int_digit)-1;
8232
45.2M
    volatile sp_int_digit mask_b = (sp_int_digit)-1;
8233
45.2M
    unsigned int i;
8234
8235
    /* In constant time, subtract b from a putting result in r. */
8236
45.2M
#ifndef SQR_MUL_ASM
8237
45.2M
    w = 0;
8238
#else
8239
    l = 0;
8240
    h = 0;
8241
#endif
8242
508M
    for (i = 0; i < max_size; i++) {
8243
        /* Values past 'used' are not initialized. */
8244
462M
        mask_a += (i == a->used);
8245
462M
        mask_b += (i == b->used);
8246
8247
462M
    #ifndef SQR_MUL_ASM
8248
        /* Add a to and subtract b from current value. */
8249
462M
        w         += a->dp[i] & mask_a;
8250
462M
        w         -= b->dp[i] & mask_b;
8251
        /* Store low digit in result. */
8252
462M
        r->dp[i]   = (sp_int_digit)w;
8253
        /* Move high digit down. */
8254
462M
        w        >>= DIGIT_BIT;
8255
    #else
8256
        /* Add a and subtract b from current value. */
8257
        t = a->dp[i] & mask_a;
8258
        SP_ASM_ADDC_REG(l, h, t);
8259
        t = b->dp[i] & mask_b;
8260
        SP_ASM_SUBB_REG(l, h, t);
8261
        /* Store low digit in result. */
8262
        r->dp[i] = l;
8263
        /* Move high digit down. */
8264
        l = h;
8265
        /* High digit is 0 when positive or -1 on negative. */
8266
        h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
8267
    #endif
8268
462M
    }
8269
    /* When w is negative then we need to add modulus to make result
8270
     * positive. */
8271
45.2M
#ifndef SQR_MUL_ASM
8272
45.2M
    mask = (sp_int_digit)0 - (w < 0);
8273
#else
8274
    mask = h;
8275
#endif
8276
8277
    /* Constant time, conditionally, add modulus to difference. */
8278
45.2M
#ifndef SQR_MUL_ASM
8279
45.2M
    w = 0;
8280
#else
8281
    l = 0;
8282
#endif
8283
508M
    for (i = 0; i < m->used; i++) {
8284
462M
    #ifndef SQR_MUL_ASM
8285
        /* Add result and conditionally modulus to current value. */
8286
462M
        w         += r->dp[i];
8287
462M
        w         += m->dp[i] & mask;
8288
        /* Store low digit in result. */
8289
462M
        r->dp[i]   = (sp_int_digit)w;
8290
        /* Move high digit down. */
8291
462M
        w        >>= DIGIT_BIT;
8292
    #else
8293
        h = 0;
8294
        /* Add result and conditionally modulus to current value. */
8295
        SP_ASM_ADDC(l, h, r->dp[i]);
8296
        t = m->dp[i] & mask;
8297
        SP_ASM_ADDC_REG(l, h, t);
8298
        /* Store low digit in result. */
8299
        r->dp[i] = l;
8300
        /* Move high digit down. */
8301
        l = h;
8302
    #endif
8303
462M
    }
8304
    /* Result will always have digits equal to or less than those in
8305
     * modulus. */
8306
45.2M
    r->used = (sp_size_t)i;
8307
45.2M
#ifdef WOLFSSL_SP_INT_NEGATIVE
8308
45.2M
    r->sign = MP_ZPOS;
8309
45.2M
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8310
    /* Remove leading zeros. */
8311
45.2M
    sp_clamp_ct(r);
8312
45.2M
}
8313
#endif
8314
8315
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
8316
/* Sub b from a modulo m: r = (a - b) % m
8317
 * Result is always positive.
8318
 *
8319
 * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
8320
 *
8321
 * Assumes a, b, m and r are not NULL.
8322
 * m and r must not be the same pointer.
8323
 *
8324
 * @param  [in]   a  SP integer to subtract from
8325
 * @param  [in]   b  SP integer to subtract.
8326
 * @param  [in]   m  SP integer that is the modulus.
8327
 * @param  [out]  r  SP integer to hold result.
8328
 *
8329
 * @return  MP_OKAY on success.
8330
 */
8331
int sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8332
81.9M
{
8333
81.9M
    int err = MP_OKAY;
8334
8335
    /* Check result is as big as modulus plus one digit. */
8336
81.9M
    if (m->used > r->size) {
8337
32
        err = MP_VAL;
8338
32
    }
8339
    /* Validate parameters. */
8340
81.9M
    if ((err == MP_OKAY) && (r == m)) {
8341
11
        err = MP_VAL;
8342
11
    }
8343
8344
81.9M
    if (err == MP_OKAY) {
8345
#if 0
8346
        sp_print(a, "a");
8347
        sp_print(b, "b");
8348
        sp_print(m, "m");
8349
#endif
8350
8351
81.9M
        _sp_submod_ct(a, b, m, m->used, r);
8352
8353
#if 0
8354
        sp_print(r, "rms");
8355
#endif
8356
81.9M
    }
8357
8358
81.9M
    return err;
8359
81.9M
}
8360
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
8361
8362
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC) && \
8363
    defined(WOLFSSL_ECC_BLIND_K)
8364
void sp_xor_ct(const sp_int* a, const sp_int* b, int len, sp_int* r)
8365
{
8366
    if ((a != NULL) && (b != NULL) && (r != NULL)) {
8367
        unsigned int i;
8368
8369
        r->used = (len * 8 + SP_WORD_SIZE - 1) / SP_WORD_SIZE;
8370
        for (i = 0; i < r->used; i++) {
8371
            r->dp[i] = a->dp[i] ^ b->dp[i];
8372
        }
8373
        i = (len * 8) % SP_WORD_SIZE;
8374
        if (i > 0) {
8375
            r->dp[r->used - 1] &= ((sp_int_digit)1 << i) - 1;
8376
        }
8377
        /* Remove leading zeros. */
8378
        sp_clamp_ct(r);
8379
    }
8380
}
8381
#endif
8382
8383
/********************
8384
 * Shifting functoins
8385
 ********************/
8386
8387
#if !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
8388
    defined(WC_RSA_BLINDING) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
8389
/* Left shift the multi-precision number by a number of digits.
8390
 *
8391
 * @param  [in,out]  a  SP integer to shift.
8392
 * @param  [in]      s  Number of digits to shift.
8393
 *
8394
 * @return  MP_OKAY on success.
8395
 * @return  MP_VAL when a is NULL, s is negative or the result is too big.
8396
 */
8397
int sp_lshd(sp_int* a, int s)
8398
41
{
8399
41
    int err = MP_OKAY;
8400
8401
    /* Validate parameters. */
8402
41
    if ((a == NULL) || (s < 0)) {
8403
0
        err = MP_VAL;
8404
0
    }
8405
    /* Ensure number has enough digits for operation. */
8406
41
    if ((err == MP_OKAY) && (a->used + (unsigned int)s > a->size)) {
8407
3
        err = MP_VAL;
8408
3
    }
8409
41
    if (err == MP_OKAY) {
8410
        /* Move up digits. */
8411
38
        XMEMMOVE(a->dp + s, a->dp, a->used * (word32)SP_WORD_SIZEOF);
8412
        /* Back fill with zeros. */
8413
38
        XMEMSET(a->dp, 0, (size_t)s * SP_WORD_SIZEOF);
8414
        /* Update used. */
8415
38
        a->used = (sp_size_t)(a->used + s);
8416
        /* Remove leading zeros. */
8417
38
        sp_clamp(a);
8418
38
    }
8419
8420
41
    return err;
8421
41
}
8422
#endif
8423
8424
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8425
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8426
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
8427
/* Left shift the multi-precision number by n bits.
8428
 * Bits may be larger than the word size.
8429
 *
8430
 * Used by sp_mul_2d() and other internal functions.
8431
 *
8432
 * @param  [in,out]  a  SP integer to shift.
8433
 * @param  [in]      n  Number of bits to shift left.
8434
 *
8435
 * @return  MP_OKAY on success.
8436
 * @return  MP_VAL when the result is too big.
8437
 */
8438
static int sp_lshb(sp_int* a, int n)
8439
33.6M
{
8440
33.6M
    int err = MP_OKAY;
8441
8442
33.6M
    if (a->used != 0) {
8443
        /* Calculate number of digits to shift. */
8444
33.6M
        sp_size_t s = (sp_size_t)n >> SP_WORD_SHIFT;
8445
8446
        /* Ensure number has enough digits for result. */
8447
33.6M
        if (a->used + s >= a->size) {
8448
34
            err = MP_VAL;
8449
34
        }
8450
33.6M
        if (err == MP_OKAY) {
8451
            /* Get count of bits to move in digit. */
8452
33.6M
            n &= (int)SP_WORD_MASK;
8453
            /* Check whether this is a complicated case. */
8454
33.6M
            if (n != 0) {
8455
33.5M
                unsigned int i;
8456
8457
                /* Shift up starting at most significant digit. */
8458
                /* Get new most significant digit. */
8459
33.5M
                sp_int_digit v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
8460
                /* Shift up each digit. */
8461
338M
                for (i = a->used - 1U; i >= 1U; i--) {
8462
304M
                    a->dp[i + s] = (a->dp[i] << n) |
8463
304M
                                   (a->dp[i - 1] >> (SP_WORD_SIZE - n));
8464
304M
                }
8465
                /* Shift up least significant digit. */
8466
33.5M
                a->dp[s] = a->dp[0] << n;
8467
                /* Add new high digit unless zero. */
8468
33.5M
                if (v != 0) {
8469
13.9M
                    a->dp[a->used + s] = v;
8470
13.9M
                    a->used++;
8471
13.9M
                }
8472
33.5M
            }
8473
            /* Only digits to move and ensure not zero. */
8474
110k
            else if (s > 0) {
8475
                /* Move up digits. */
8476
30
                XMEMMOVE(a->dp + s, a->dp, a->used * (word32)SP_WORD_SIZEOF);
8477
30
            }
8478
8479
            /* Update used digit count. */
8480
33.6M
            a->used = (sp_size_t)(a->used + s);
8481
            /* Back fill with zeros. */
8482
33.6M
            XMEMSET(a->dp, 0, (word32)SP_WORD_SIZEOF * s);
8483
33.6M
        }
8484
33.6M
    }
8485
8486
33.6M
    return err;
8487
33.6M
}
8488
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
8489
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
8490
8491
#ifdef WOLFSSL_SP_MATH_ALL
8492
/* Shift a right by c digits: a = a >> (n * SP_WORD_SIZE)
8493
 *
8494
 * @param  [in, out] a  SP integer to shift.
8495
 * @param  [in]      c  Number of digits to shift.
8496
 */
8497
void sp_rshd(sp_int* a, int c)
8498
92
{
8499
    /* Do shift if we have an SP int. */
8500
92
    if ((a != NULL) && (c > 0)) {
8501
        /* Make zero if shift removes all digits. */
8502
55
        if ((sp_size_t)c >= a->used) {
8503
30
            _sp_zero(a);
8504
30
        }
8505
25
        else {
8506
25
            sp_size_t i;
8507
8508
            /* Update used digits count. */
8509
25
            a->used = (sp_size_t)(a->used - c);
8510
            /* Move digits down. */
8511
394
            for (i = 0; i < a->used; i++, c++) {
8512
369
                a->dp[i] = a->dp[c];
8513
369
            }
8514
25
        }
8515
55
    }
8516
92
}
8517
#endif /* WOLFSSL_SP_MATH_ALL */
8518
8519
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8520
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
8521
    defined(WOLFSSL_HAVE_SP_DH)
8522
/* Shift a right by n bits into r: r = a >> n
8523
 *
8524
 * @param  [in]   a  SP integer to shift.
8525
 * @param  [in]   n  Number of bits to shift.
8526
 * @param  [out]  r  SP integer to store result in.
8527
 */
8528
int sp_rshb(const sp_int* a, int n, sp_int* r)
8529
242M
{
8530
242M
    int err = MP_OKAY;
8531
    /* Number of digits to shift down. */
8532
242M
    sp_size_t i;
8533
8534
242M
    if ((a == NULL) || (n < 0)) {
8535
0
        err = MP_VAL;
8536
0
    }
8537
    /* Handle case where shifting out all digits. */
8538
242M
    else if ((i = (sp_size_t)(n >> SP_WORD_SHIFT)) >= a->used) {
8539
656k
        _sp_zero(r);
8540
656k
    }
8541
    /* Change callers when more error cases returned. */
8542
241M
    else if ((err == MP_OKAY) && (a->used - i > r->size)) {
8543
9
        err = MP_VAL;
8544
9
    }
8545
241M
    else if (err == MP_OKAY) {
8546
241M
        sp_size_t j;
8547
8548
        /* Number of bits to shift in digits. */
8549
241M
        n &= SP_WORD_SIZE - 1;
8550
        /* Handle simple case. */
8551
241M
        if (n == 0) {
8552
            /* Set the count of used digits. */
8553
118M
            r->used = (sp_size_t)(a->used - i);
8554
            /* Move digits down. */
8555
118M
            if (r == a) {
8556
118M
                XMEMMOVE(r->dp, r->dp + i, (word32)SP_WORD_SIZEOF * r->used);
8557
118M
            }
8558
170
            else {
8559
170
                XMEMCPY(r->dp, a->dp + i, (word32)SP_WORD_SIZEOF * r->used);
8560
170
            }
8561
118M
        }
8562
122M
        else {
8563
            /* Move the bits down starting at least significant digit. */
8564
899M
            for (j = 0; j < (sp_size_t)(a->used - 1 - i); j++)
8565
776M
                r->dp[j] = (a->dp[j+i] >> n) |
8566
776M
                    (a->dp[j+i+1] << (SP_WORD_SIZE - n));
8567
            /* Most significant digit has no higher digit to pull from. */
8568
122M
            r->dp[j] = a->dp[j+i] >> n;
8569
            /* Set the count of used digits. */
8570
122M
            r->used = (sp_size_t)(j + (r->dp[j] > 0));
8571
122M
        }
8572
241M
#ifdef WOLFSSL_SP_INT_NEGATIVE
8573
241M
        if (sp_iszero(r)) {
8574
            /* Set zero sign. */
8575
16.0k
            r->sign = MP_ZPOS;
8576
16.0k
        }
8577
241M
        else {
8578
            /* Retain sign. */
8579
241M
            r->sign = a->sign;
8580
241M
        }
8581
241M
#endif
8582
241M
    }
8583
8584
242M
    return err;
8585
242M
}
8586
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
8587
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
8588
8589
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8590
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8591
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
8592
static void _sp_div_same_size(sp_int* a, const sp_int* d, sp_int* r)
8593
44.1M
{
8594
44.1M
    sp_size_t i;
8595
8596
    /* Compare top digits of dividend with those of divisor up to last. */
8597
44.4M
    for (i = (sp_size_t)(d->used - 1U); i > 0; i--) {
8598
        /* Break if top divisor is not equal to dividend. */
8599
27.9M
        if (a->dp[a->used - d->used + i] != d->dp[i]) {
8600
27.6M
            break;
8601
27.6M
        }
8602
27.9M
    }
8603
    /* Check if top dividend is greater than or equal to divisor. */
8604
44.1M
    if (a->dp[a->used - d->used + i] >= d->dp[i]) {
8605
        /* Update quotient result. */
8606
83.8k
        r->dp[a->used - d->used] += 1;
8607
        /* Get 'used' to restore - ensure zeros put into quotient. */
8608
83.8k
        i = a->used;
8609
        /* Subtract d from top of a. */
8610
83.8k
        _sp_sub_off(a, d, a, (sp_size_t)(a->used - d->used));
8611
        /* Restore 'used' on remainder. */
8612
83.8k
        a->used = i;
8613
83.8k
    }
8614
44.1M
}
8615
8616
/* Divide a by d and return the quotient in r and the remainder in a.
8617
 *   r = a / d; a = a % d
8618
 *
8619
 * Note: a is constantly having multiplies of d subtracted.
8620
 *
8621
 * @param  [in, out] a      SP integer to be divided and remainder on out.
8622
 * @param  [in]      d      SP integer to divide by.
8623
 * @param  [out]     r      SP integer that is the quotient.
8624
 * @param  [out]     trial  SP integer that is product in trial division.
8625
 *
8626
 * @return  MP_OKAY on success.
8627
 * @return  MP_VAL when operation fails - only when compiling small code.
8628
 */
8629
static int _sp_div_impl(sp_int* a, const sp_int* d, sp_int* r, sp_int* trial)
8630
22.0M
{
8631
22.0M
    int err = MP_OKAY;
8632
22.0M
    sp_size_t i;
8633
#ifdef WOLFSSL_SP_SMALL
8634
    int c;
8635
#else
8636
22.0M
    sp_size_t j;
8637
22.0M
    sp_size_t o;
8638
22.0M
    #ifndef SQR_MUL_ASM
8639
22.0M
    sp_int_sword sw;
8640
    #else
8641
    sp_int_digit sl;
8642
    sp_int_digit sh;
8643
    sp_int_digit st;
8644
    #endif
8645
22.0M
#endif /* WOLFSSL_SP_SMALL */
8646
22.0M
    sp_int_digit t;
8647
22.0M
    sp_int_digit dt;
8648
8649
    /* Set result size to clear. */
8650
22.0M
    r->used = (sp_size_t)(a->used - d->used + 1);
8651
    /* Set all potentially used digits to zero. */
8652
97.1M
    for (i = 0; i < r->used; i++) {
8653
75.0M
        r->dp[i] = 0;
8654
75.0M
    }
8655
22.0M
#ifdef WOLFSSL_SP_INT_NEGATIVE
8656
22.0M
    r->sign = MP_ZPOS;
8657
22.0M
#endif
8658
    /* Get the most significant digit (will have top bit set). */
8659
22.0M
    dt = d->dp[d->used-1];
8660
8661
    /* Handle when a >= d ^ (2 ^ (SP_WORD_SIZE * x)). */
8662
22.0M
    _sp_div_same_size(a, d, r);
8663
8664
    /* Keep subtracting multiples of d as long as the digit count of a is
8665
     * greater than equal to d.
8666
     */
8667
75.0M
    for (i = (sp_size_t)(a->used - 1U); i >= d->used; i--) {
8668
        /* When top digits equal, guestimate maximum multiplier.
8669
         * Worst case, multiplier is actually SP_DIGIT_MAX - 1.
8670
         * That is, for w (word size in bits) > 1, n > 1, let:
8671
         *   a = 2^((n+1)*w-1), d = 2^(n*w-1) + 2^((n-1)*w) - 1, t = 2^w - 2
8672
         * Then,
8673
         *     d * t
8674
         *   = (2^(n*w-1) + 2^((n-1)*w) - 1) * (2^w - 2)
8675
         *   = 2^((n+1)*w-1) - 2^(n*w) + 2^(n*w) - 2^((n-1)*w+1) - 2^w + 2
8676
         *   = 2^((n+1)*w-1) - 2^((n-1)*w+1) - 2^w + 2
8677
         *   = a - 2^((n-1)*w+1) - 2^w + 2
8678
         * d > 2^((n-1)*w+1) + 2^w - 2, when w > 1, n > 1
8679
         */
8680
53.0M
        if (a->dp[i] == dt) {
8681
176k
            t = SP_DIGIT_MAX;
8682
176k
        }
8683
52.8M
        else {
8684
            /* Calculate trial quotient by dividing top word of dividend by top
8685
             * digit of divisor.
8686
             * Some implementations segfault when quotient > SP_DIGIT_MAX.
8687
             * Implementations in assembly, using builtins or using
8688
             * digits only (WOLFSSL_SP_DIV_WORD_HALF).
8689
             */
8690
52.8M
            t = sp_div_word(a->dp[i], a->dp[i-1], dt);
8691
52.8M
        }
8692
#ifdef WOLFSSL_SP_SMALL
8693
        do {
8694
            /* Calculate trial from trial quotient. */
8695
            err = _sp_mul_d(d, t, trial, i - d->used);
8696
            if (err != MP_OKAY) {
8697
                break;
8698
            }
8699
            /* Check if trial is bigger. */
8700
            c = _sp_cmp_abs(trial, a);
8701
            if (c == MP_GT) {
8702
                /* Decrement trial quotient and try again. */
8703
                t--;
8704
            }
8705
        }
8706
        while (c == MP_GT);
8707
8708
        if (err != MP_OKAY) {
8709
            break;
8710
        }
8711
8712
        /* Subtract the trial and add qoutient to result. */
8713
        _sp_sub_off(a, trial, a, 0);
8714
        r->dp[i - d->used] += t;
8715
        /* Handle overflow of digit. */
8716
        if (r->dp[i - d->used] < t) {
8717
            r->dp[i + 1 - d->used]++;
8718
        }
8719
#else
8720
        /* Index of lowest digit trial is subtracted from. */
8721
53.0M
        o = (sp_size_t)(i - d->used);
8722
63.4M
        do {
8723
63.4M
        #ifndef SQR_MUL_ASM
8724
63.4M
            sp_int_word tw = 0;
8725
        #else
8726
            sp_int_digit tl = 0;
8727
            sp_int_digit th = 0;
8728
        #endif
8729
8730
            /* Multiply divisor by trial quotient. */
8731
1.58G
            for (j = 0; j < d->used; j++) {
8732
1.51G
            #ifndef SQR_MUL_ASM
8733
1.51G
                tw += (sp_int_word)d->dp[j] * t;
8734
1.51G
                trial->dp[j] = (sp_int_digit)tw;
8735
1.51G
                tw >>= SP_WORD_SIZE;
8736
            #else
8737
                SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
8738
                trial->dp[j] = tl;
8739
                tl = th;
8740
                th = 0;
8741
            #endif
8742
1.51G
            }
8743
63.4M
          #ifndef SQR_MUL_ASM
8744
63.4M
            trial->dp[j] = (sp_int_digit)tw;
8745
          #else
8746
            trial->dp[j] = tl;
8747
          #endif
8748
8749
            /* Check trial quotient isn't larger than dividend. */
8750
101M
            for (j = d->used; j > 0; j--) {
8751
96.8M
                if (trial->dp[j] != a->dp[j + o]) {
8752
58.5M
                    break;
8753
58.5M
                }
8754
96.8M
            }
8755
            /* Decrement trial quotient if larger and try again. */
8756
63.4M
            if (trial->dp[j] > a->dp[j + o]) {
8757
10.3M
                t--;
8758
10.3M
            }
8759
63.4M
        }
8760
63.4M
        while (trial->dp[j] > a->dp[j + o]);
8761
8762
53.0M
    #ifndef SQR_MUL_ASM
8763
53.0M
        sw = 0;
8764
    #else
8765
        sl = 0;
8766
        sh = 0;
8767
    #endif
8768
        /* Subtract trial - don't need to update used. */
8769
1.26G
        for (j = 0; j <= d->used; j++) {
8770
1.21G
        #ifndef SQR_MUL_ASM
8771
1.21G
            sw += a->dp[j + o];
8772
1.21G
            sw -= trial->dp[j];
8773
1.21G
            a->dp[j + o] = (sp_int_digit)sw;
8774
1.21G
            sw >>= SP_WORD_SIZE;
8775
        #else
8776
            st = a->dp[j + o];
8777
            SP_ASM_ADDC(sl, sh, st);
8778
            st = trial->dp[j];
8779
            SP_ASM_SUBB(sl, sh, st);
8780
            a->dp[j + o] = sl;
8781
            sl = sh;
8782
            sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
8783
        #endif
8784
1.21G
        }
8785
8786
53.0M
        r->dp[o] = t;
8787
53.0M
#endif /* WOLFSSL_SP_SMALL */
8788
53.0M
    }
8789
    /* Update used. */
8790
22.0M
    a->used = (sp_size_t)(i + 1U);
8791
22.0M
    if (a->used == d->used) {
8792
        /* Finish div now that length of dividend is same as divisor. */
8793
22.0M
        _sp_div_same_size(a, d, r);
8794
22.0M
    }
8795
8796
22.0M
    return err;
8797
22.0M
}
8798
8799
/* Divide a by d and return the quotient in r and the remainder in rem.
8800
 *   r = a / d; rem = a % d
8801
 *
8802
 * @param  [in]   a     SP integer to be divided.
8803
 * @param  [in]   d     SP integer to divide by.
8804
 * @param  [out]  r     SP integer that is the quotient.
8805
 * @param  [out]  rem   SP integer that is the remainder.
8806
 * @param  [in]   used  Number of digits in temporaries to use.
8807
 *
8808
 * @return  MP_OKAY on success.
8809
 * @return  MP_MEM when dynamic memory allocation fails.
8810
 */
8811
static int _sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem,
8812
    unsigned int used)
8813
22.4M
{
8814
22.4M
    int err = MP_OKAY;
8815
22.4M
    int ret;
8816
22.4M
    int done = 0;
8817
22.4M
    int s = 0;
8818
22.4M
    sp_int* sa = NULL;
8819
22.4M
    sp_int* sd = NULL;
8820
22.4M
    sp_int* tr = NULL;
8821
22.4M
    sp_int* trial = NULL;
8822
22.4M
#ifdef WOLFSSL_SP_INT_NEGATIVE
8823
22.4M
    sp_uint8 signA = MP_ZPOS;
8824
22.4M
    sp_uint8 signD = MP_ZPOS;
8825
22.4M
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8826
    /* Intermediates will always be less than or equal to dividend. */
8827
22.4M
    DECL_SP_INT_ARRAY(td, used, 4);
8828
8829
22.4M
#ifdef WOLFSSL_SP_INT_NEGATIVE
8830
    /* Cache sign for results. */
8831
22.4M
    signA = a->sign;
8832
22.4M
    signD = d->sign;
8833
22.4M
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8834
8835
    /* Handle simple case of: dividend < divisor. */
8836
22.4M
    ret = _sp_cmp_abs(a, d);
8837
22.4M
    if (ret == MP_LT) {
8838
        /* a = 0 * d + a */
8839
5.63M
        if ((rem != NULL) && (a != rem)) {
8840
4.82M
            _sp_copy(a, rem);
8841
4.82M
        }
8842
5.63M
        if (r != NULL) {
8843
51
            _sp_set(r, 0);
8844
51
        }
8845
5.63M
        done = 1;
8846
5.63M
    }
8847
    /* Handle simple case of: dividend == divisor. */
8848
16.7M
    else if (ret == MP_EQ) {
8849
        /* a = 1 * d + 0 */
8850
720k
        if (rem != NULL) {
8851
720k
            _sp_set(rem, 0);
8852
720k
        }
8853
720k
        if (r != NULL) {
8854
55
            _sp_set(r, 1);
8855
55
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8856
55
            r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
8857
55
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
8858
55
        }
8859
720k
        done = 1;
8860
720k
    }
8861
16.0M
    else if (sp_count_bits(a) == sp_count_bits(d)) {
8862
        /* a is greater than d but same bit length - subtract. */
8863
2.26M
        if (rem != NULL) {
8864
2.26M
            _sp_sub_off(a, d, rem, 0);
8865
2.26M
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8866
2.26M
            rem->sign = signA;
8867
2.26M
        #endif
8868
2.26M
        }
8869
2.26M
        if (r != NULL) {
8870
2.23M
            _sp_set(r, 1);
8871
2.23M
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8872
2.23M
            r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
8873
2.23M
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
8874
2.23M
        }
8875
2.26M
        done = 1;
8876
2.26M
    }
8877
8878
    /* Allocate temporary 'sp_int's and assign. */
8879
22.4M
    if ((!done) && (err == MP_OKAY)) {
8880
13.7M
    #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
8881
13.7M
        !defined(WOLFSSL_SP_NO_MALLOC)
8882
13.7M
        unsigned int cnt = 4;
8883
        /* Reuse remainder sp_int where possible. */
8884
13.7M
        if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
8885
13.7M
            sa = rem;
8886
13.7M
            cnt--;
8887
13.7M
        }
8888
        /* Reuse result sp_int where possible. */
8889
13.7M
        if ((r != NULL) && (r != d)) {
8890
7.86M
            tr = r;
8891
7.86M
            cnt--;
8892
7.86M
        }
8893
        /* Macro always has code associated with it and checks err first. */
8894
13.7M
        ALLOC_SP_INT_ARRAY(td, used, cnt, err, NULL);
8895
    #else
8896
        ALLOC_SP_INT_ARRAY(td, used, 4, err, NULL);
8897
    #endif
8898
13.7M
    }
8899
22.4M
    if ((!done) && (err == MP_OKAY)) {
8900
13.7M
    #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
8901
13.7M
        !defined(WOLFSSL_SP_NO_MALLOC)
8902
13.7M
        int i = 2;
8903
8904
        /* Set to temporary when not reusing. */
8905
13.7M
        if (sa == NULL) {
8906
913
            sa = td[i++];
8907
913
            _sp_init_size(sa, used);
8908
913
        }
8909
13.7M
        if (tr == NULL) {
8910
5.92M
            tr = td[i];
8911
5.92M
            _sp_init_size(tr, (unsigned int)(a->used - d->used + 2));
8912
5.92M
        }
8913
    #else
8914
        sa    = td[2];
8915
        tr    = td[3];
8916
8917
        _sp_init_size(sa, used);
8918
        _sp_init_size(tr, (unsigned int)(a->used - d->used + 2));
8919
    #endif
8920
13.7M
        sd    = td[0];
8921
13.7M
        trial = td[1];
8922
8923
        /* Initialize sizes to minimal values. */
8924
13.7M
        _sp_init_size(sd, (sp_size_t)(d->used + 1U));
8925
13.7M
        _sp_init_size(trial, used);
8926
8927
        /* Move divisor to top of word. Adjust dividend as well. */
8928
13.7M
        s = sp_count_bits(d);
8929
13.7M
        s = SP_WORD_SIZE - (s & (int)SP_WORD_MASK);
8930
13.7M
        _sp_copy(a, sa);
8931
        /* Only shift if top bit of divisor no set. */
8932
13.7M
        if (s != SP_WORD_SIZE) {
8933
10.8M
            err = sp_lshb(sa, s);
8934
10.8M
            if (err == MP_OKAY) {
8935
10.8M
                _sp_copy(d, sd);
8936
10.8M
                d = sd;
8937
10.8M
                err = sp_lshb(sd, s);
8938
10.8M
            }
8939
10.8M
        }
8940
13.7M
    }
8941
22.4M
    if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
8942
        /* Do division: tr = sa / d, sa = sa % d. */
8943
13.7M
        err = _sp_div_impl(sa, d, tr, trial);
8944
        /* Return the remainder if required. */
8945
13.7M
        if ((err == MP_OKAY) && (rem != NULL)) {
8946
            /* Move result back down if moved up for divisor value. */
8947
13.7M
            if (s != SP_WORD_SIZE) {
8948
10.8M
                (void)sp_rshb(sa, s, sa);
8949
10.8M
            }
8950
13.7M
            _sp_copy(sa, rem);
8951
13.7M
            sp_clamp(rem);
8952
13.7M
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8953
13.7M
            rem->sign = (rem->used == 0) ? MP_ZPOS : signA;
8954
13.7M
        #endif
8955
13.7M
        }
8956
        /* Return the quotient if required. */
8957
13.7M
        if ((err == MP_OKAY) && (r != NULL)) {
8958
7.86M
            _sp_copy(tr, r);
8959
7.86M
            sp_clamp(r);
8960
7.86M
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8961
7.86M
            if ((r->used == 0) || (signA == signD)) {
8962
7.86M
                r->sign = MP_ZPOS;
8963
7.86M
            }
8964
7
            else {
8965
7
                r->sign = MP_NEG;
8966
7
            }
8967
7.86M
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
8968
7.86M
        }
8969
13.7M
    }
8970
8971
22.4M
    FREE_SP_INT_ARRAY(td, NULL);
8972
22.4M
    return err;
8973
22.4M
}
8974
8975
/* Divide a by d and return the quotient in r and the remainder in rem.
8976
 *   r = a / d; rem = a % d
8977
 *
8978
 * @param  [in]   a    SP integer to be divided.
8979
 * @param  [in]   d    SP integer to divide by.
8980
 * @param  [out]  r    SP integer that is the quotient.
8981
 * @param  [out]  rem  SP integer that is the remainder.
8982
 *
8983
 * @return  MP_OKAY on success.
8984
 * @return  MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
8985
 * @return  MP_MEM when dynamic memory allocation fails.
8986
 */
8987
int sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem)
8988
31.5M
{
8989
31.5M
    int err = MP_OKAY;
8990
31.5M
    unsigned int used = 1;
8991
8992
    /* Validate parameters. */
8993
31.5M
    if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
8994
0
        err = MP_VAL;
8995
0
    }
8996
    /* a / 0 = infinity. */
8997
31.5M
    if ((err == MP_OKAY) && sp_iszero(d)) {
8998
574
        err = MP_VAL;
8999
574
    }
9000
    /* Ensure quotient result has enough memory. */
9001
31.5M
    if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
9002
23
        err = MP_VAL;
9003
23
    }
9004
31.5M
    if ((err == MP_OKAY) && (rem != NULL)) {
9005
        /* Ensure remainder has enough memory. */
9006
31.5M
        if ((a->used <= d->used) && (rem->size < a->used + 1)) {
9007
47
            err = MP_VAL;
9008
47
        }
9009
31.5M
        else if ((a->used > d->used) && (rem->size < d->used + 1)) {
9010
17
            err = MP_VAL;
9011
17
        }
9012
31.5M
    }
9013
31.5M
    if (err == MP_OKAY) {
9014
31.5M
        if (a->used == SP_INT_DIGITS) {
9015
            /* May need to shift number being divided left into a new word. */
9016
33
            int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
9017
33
            if ((bits != SP_WORD_SIZE) &&
9018
28
                    (sp_count_bits(a) + bits > (int)(SP_INT_DIGITS * SP_WORD_SIZE))) {
9019
11
                err = MP_VAL;
9020
11
            }
9021
22
            else {
9022
22
                used = SP_INT_DIGITS;
9023
22
            }
9024
33
        }
9025
31.5M
        else {
9026
31.5M
            used = (sp_size_t)(a->used + 1U);
9027
31.5M
        }
9028
31.5M
    }
9029
9030
31.5M
    if (err == MP_OKAY) {
9031
    #if 0
9032
        sp_print(a, "a");
9033
        sp_print(d, "b");
9034
    #endif
9035
        /* Do operation. */
9036
31.5M
        err = _sp_div(a, d, r, rem, used);
9037
    #if 0
9038
        if (err == MP_OKAY) {
9039
            if (rem != NULL) {
9040
                sp_print(rem, "rdr");
9041
            }
9042
            if (r != NULL) {
9043
                sp_print(r, "rdw");
9044
            }
9045
        }
9046
    #endif
9047
31.5M
    }
9048
9049
31.5M
    return err;
9050
31.5M
}
9051
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
9052
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
9053
9054
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
9055
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
9056
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
9057
#ifndef FREESCALE_LTC_TFM
9058
#ifdef WOLFSSL_SP_INT_NEGATIVE
9059
/* Calculate the remainder of dividing a by m: r = a mod m. r is m.
9060
 *
9061
 * @param  [in]   a  SP integer to reduce.
9062
 * @param  [in]   m  SP integer that is the modulus.
9063
 * @param  [out]  r  SP integer to store result in.
9064
 *
9065
 * @return  MP_OKAY on success.
9066
 * @return  MP_MEM when dynamic memory allocation fails.
9067
 */
9068
static int _sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
9069
80
{
9070
80
    int err = MP_OKAY;
9071
    /* Remainder will start as a. */
9072
80
    DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
9073
9074
    /* In case remainder is modulus - allocate temporary. */
9075
80
    ALLOC_SP_INT(t, a->used + 1, err, NULL);
9076
80
    if (err == MP_OKAY) {
9077
64
        _sp_init_size(t, a->used + 1);
9078
        /* Use divide to calculate remainder and don't get quotient. */
9079
64
        err = sp_div(a, m, NULL, t);
9080
64
    }
9081
80
    if (err == MP_OKAY) {
9082
        /* Make remainder positive and copy into result. */
9083
53
        if ((!sp_iszero(t)) && (t->sign != m->sign)) {
9084
22
            err = sp_add(t, m, r);
9085
22
        }
9086
31
        else {
9087
31
            _sp_copy(t, r);
9088
31
        }
9089
53
    }
9090
80
    FREE_SP_INT(t, NULL);
9091
9092
80
    return err;
9093
80
}
9094
#endif
9095
9096
/* Calculate the remainder of dividing a by m: r = a mod m.
9097
 *
9098
 * @param  [in]   a  SP integer to reduce.
9099
 * @param  [in]   m  SP integer that is the modulus.
9100
 * @param  [out]  r  SP integer to store result in.
9101
 *
9102
 * @return  MP_OKAY on success.
9103
 * @return  MP_VAL when a, m or r is NULL or m is 0.
9104
 * @return  MP_MEM when dynamic memory allocation fails.
9105
 */
9106
int sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
9107
11.6M
{
9108
11.6M
    int err = MP_OKAY;
9109
9110
    /* Validate parameters. */
9111
11.6M
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
9112
0
        err = MP_VAL;
9113
0
    }
9114
    /* Ensure a isn't too big a number to operate on. */
9115
11.6M
    else if (a->used >= SP_INT_DIGITS) {
9116
12
        err = MP_VAL;
9117
12
    }
9118
9119
#ifndef WOLFSSL_SP_INT_NEGATIVE
9120
    if (err == MP_OKAY) {
9121
        /* Use divide to calculate remainder and don't get quotient. */
9122
        err = sp_div(a, m, NULL, r);
9123
    }
9124
#else
9125
11.6M
    if ((err == MP_OKAY) && (r != m)) {
9126
11.6M
        err = sp_div(a, m, NULL, r);
9127
11.6M
        if ((err == MP_OKAY) && (!sp_iszero(r)) && (r->sign != m->sign)) {
9128
16.2k
            err = sp_add(r, m, r);
9129
16.2k
        }
9130
11.6M
    }
9131
92
    else if (err == MP_OKAY) {
9132
80
        err = _sp_mod(a, m, r);
9133
80
    }
9134
11.6M
#endif /* WOLFSSL_SP_INT_NEGATIVE */
9135
9136
11.6M
    return err;
9137
11.6M
}
9138
#endif /* !FREESCALE_LTC_TFM */
9139
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
9140
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
9141
9142
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
9143
    defined(HAVE_ECC) || !defined(NO_RSA)
9144
9145
/* START SP_MUL implementations. */
9146
/* This code is generated.
9147
 * To generate:
9148
 *   cd scripts/sp/sp_int
9149
 *   ./gen.sh
9150
 * File sp_mul.c contains code.
9151
 */
9152
9153
#ifdef SQR_MUL_ASM
9154
/* Multiply a by b into r where a and b have same no. digits. r = a * b
9155
 *
9156
 * Optimised code for when number of digits in a and b are the same.
9157
 *
9158
 * @param  [in]   a    SP integer to multiply.
9159
 * @param  [in]   b    SP integer to multiply by.
9160
 * @param  [out]  r    SP integer to hold result.
9161
 *
9162
 * @return  MP_OKAY otherwise.
9163
 * @return  MP_MEM when dynamic memory allocation fails.
9164
 */
9165
static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r)
9166
{
9167
    int err = MP_OKAY;
9168
    unsigned int i;
9169
    int j;
9170
    unsigned int k;
9171
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9172
    sp_int_digit* t = NULL;
9173
#elif defined(WOLFSSL_SP_DYN_STACK)
9174
    sp_int_digit t[a->used];
9175
#else
9176
    sp_int_digit t[SP_INT_DIGITS / 2];
9177
#endif
9178
9179
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9180
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * a->used, NULL,
9181
        DYNAMIC_TYPE_BIGINT);
9182
    if (t == NULL) {
9183
        err = MP_MEM;
9184
    }
9185
#endif
9186
    if (err == MP_OKAY) {
9187
        sp_int_digit l;
9188
        sp_int_digit h;
9189
        sp_int_digit o;
9190
        const sp_int_digit* dp;
9191
9192
        h = 0;
9193
        l = 0;
9194
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9195
        t[0] = h;
9196
        h = 0;
9197
        o = 0;
9198
        for (k = 1; k <= (unsigned int)a->used - 1; k++) {
9199
            j = (int)k;
9200
            dp = a->dp;
9201
            for (; j >= 0; dp++, j--) {
9202
                SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
9203
            }
9204
            t[k] = l;
9205
            l = h;
9206
            h = o;
9207
            o = 0;
9208
        }
9209
        for (; k <= ((unsigned int)a->used - 1) * 2; k++) {
9210
            i = k - (sp_size_t)(b->used - 1);
9211
            dp = &b->dp[b->used - 1];
9212
            for (; i < a->used; i++, dp--) {
9213
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
9214
            }
9215
            r->dp[k] = l;
9216
            l = h;
9217
            h = o;
9218
            o = 0;
9219
        }
9220
        r->dp[k] = l;
9221
        XMEMCPY(r->dp, t, a->used * sizeof(sp_int_digit));
9222
        r->used = (sp_size_t)(k + 1);
9223
        sp_clamp(r);
9224
    }
9225
9226
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9227
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9228
#endif
9229
    return err;
9230
}
9231
9232
/* Multiply a by b into r. r = a * b
9233
 *
9234
 * @param  [in]   a    SP integer to multiply.
9235
 * @param  [in]   b    SP integer to multiply by.
9236
 * @param  [out]  r    SP integer to hold result.
9237
 *
9238
 * @return  MP_OKAY otherwise.
9239
 * @return  MP_MEM when dynamic memory allocation fails.
9240
 */
9241
static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
9242
{
9243
    int err = MP_OKAY;
9244
    sp_size_t i;
9245
    int j;
9246
    sp_size_t k;
9247
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9248
    sp_int_digit* t = NULL;
9249
#elif defined(WOLFSSL_SP_DYN_STACK)
9250
    sp_int_digit t[a->used + b->used];
9251
#else
9252
    sp_int_digit t[SP_INT_DIGITS];
9253
#endif
9254
9255
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9256
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
9257
                               (size_t)(a->used + b->used), NULL,
9258
                               DYNAMIC_TYPE_BIGINT);
9259
    if (t == NULL) {
9260
        err = MP_MEM;
9261
    }
9262
#endif
9263
    if (err == MP_OKAY) {
9264
        sp_int_digit l;
9265
        sp_int_digit h;
9266
        sp_int_digit o;
9267
9268
        h = 0;
9269
        l = 0;
9270
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9271
        t[0] = h;
9272
        h = 0;
9273
        o = 0;
9274
        for (k = 1; k <= (sp_size_t)(b->used - 1); k++) {
9275
            i = 0;
9276
            j = (int)k;
9277
            for (; (i < a->used) && (j >= 0); i++, j--) {
9278
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
9279
            }
9280
            t[k] = l;
9281
            l = h;
9282
            h = o;
9283
            o = 0;
9284
        }
9285
        for (; k <= (sp_size_t)((a->used - 1) + (b->used - 1)); k++) {
9286
            j = (int)(b->used - 1);
9287
            i = (sp_size_t)(k - (sp_size_t)j);
9288
            for (; (i < a->used) && (j >= 0); i++, j--) {
9289
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
9290
            }
9291
            t[k] = l;
9292
            l = h;
9293
            h = o;
9294
            o = 0;
9295
        }
9296
        t[k] = l;
9297
        r->used = (sp_size_t)(k + 1);
9298
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9299
        sp_clamp(r);
9300
    }
9301
9302
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9303
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9304
#endif
9305
    return err;
9306
}
9307
#else
9308
/* Multiply a by b into r. r = a * b
9309
 *
9310
 * @param  [in]   a    SP integer to multiply.
9311
 * @param  [in]   b    SP integer to multiply by.
9312
 * @param  [out]  r    SP integer to hold result.
9313
 *
9314
 * @return  MP_OKAY otherwise.
9315
 * @return  MP_MEM when dynamic memory allocation fails.
9316
 */
9317
static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
9318
46.8M
{
9319
46.8M
    int err = MP_OKAY;
9320
46.8M
    sp_size_t i;
9321
46.8M
    int j;
9322
46.8M
    sp_size_t k;
9323
46.8M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9324
46.8M
    sp_int_digit* t = NULL;
9325
#elif defined(WOLFSSL_SP_DYN_STACK)
9326
    sp_int_digit t[a->used + b->used];
9327
#else
9328
    sp_int_digit t[SP_INT_DIGITS];
9329
#endif
9330
9331
46.8M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9332
46.8M
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
9333
46.8M
                               (size_t)(a->used + b->used), NULL,
9334
46.8M
                               DYNAMIC_TYPE_BIGINT);
9335
46.8M
    if (t == NULL) {
9336
950
        err = MP_MEM;
9337
950
    }
9338
46.8M
#endif
9339
46.8M
    if (err == MP_OKAY) {
9340
46.8M
        sp_int_word w;
9341
46.8M
        sp_int_word l;
9342
46.8M
        sp_int_word h;
9343
46.8M
    #ifdef SP_WORD_OVERFLOW
9344
46.8M
        sp_int_word o;
9345
46.8M
    #endif
9346
9347
46.8M
        w = (sp_int_word)a->dp[0] * b->dp[0];
9348
46.8M
        t[0] = (sp_int_digit)w;
9349
46.8M
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
9350
46.8M
        h = 0;
9351
46.8M
    #ifdef SP_WORD_OVERFLOW
9352
46.8M
        o = 0;
9353
46.8M
    #endif
9354
1.64G
        for (k = 1; (int)k <= ((int)a->used - 1) + ((int)b->used - 1); k++) {
9355
1.59G
            i = (sp_size_t)(k - (b->used - 1));
9356
1.59G
            i &= (sp_size_t)(((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
9357
1.59G
            j = (int)(k - i);
9358
25.1G
            for (; (i < a->used) && (j >= 0); i++, j--) {
9359
23.5G
                w = (sp_int_word)a->dp[i] * b->dp[j];
9360
23.5G
                l += (sp_int_digit)w;
9361
23.5G
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
9362
23.5G
            #ifdef SP_WORD_OVERFLOW
9363
23.5G
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
9364
23.5G
                l &= SP_MASK;
9365
23.5G
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
9366
23.5G
                h &= SP_MASK;
9367
23.5G
            #endif
9368
23.5G
            }
9369
1.59G
            t[k] = (sp_int_digit)l;
9370
1.59G
            l >>= SP_WORD_SIZE;
9371
1.59G
            l += (sp_int_digit)h;
9372
1.59G
            h >>= SP_WORD_SIZE;
9373
1.59G
        #ifdef SP_WORD_OVERFLOW
9374
1.59G
            h += o & SP_MASK;
9375
1.59G
            o >>= SP_WORD_SIZE;
9376
1.59G
        #endif
9377
1.59G
        }
9378
46.8M
        t[k] = (sp_int_digit)l;
9379
46.8M
        r->used = (sp_size_t)(k + 1);
9380
46.8M
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9381
46.8M
        sp_clamp(r);
9382
46.8M
    }
9383
9384
46.8M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9385
46.8M
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9386
46.8M
#endif
9387
46.8M
    return err;
9388
46.8M
}
9389
#endif
9390
9391
#ifndef WOLFSSL_SP_SMALL
9392
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
9393
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
9394
#ifndef SQR_MUL_ASM
9395
/* Multiply a by b and store in r: r = a * b
9396
 *
9397
 * Long-hand implementation.
9398
 *
9399
 * @param  [in]   a  SP integer to multiply.
9400
 * @param  [in]   b  SP integer to multiply.
9401
 * @param  [out]  r  SP integer result.
9402
 *
9403
 * @return  MP_OKAY on success.
9404
 * @return  MP_MEM when dynamic memory allocation fails.
9405
 */
9406
static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
9407
{
9408
    int err = MP_OKAY;
9409
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9410
    sp_int_word* w = NULL;
9411
#else
9412
    sp_int_word w[16];
9413
#endif
9414
    const sp_int_digit* da = a->dp;
9415
    const sp_int_digit* db = b->dp;
9416
9417
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9418
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
9419
        DYNAMIC_TYPE_BIGINT);
9420
    if (w == NULL) {
9421
        err = MP_MEM;
9422
    }
9423
#endif
9424
9425
    if (err == MP_OKAY) {
9426
        w[0] = (sp_int_word)da[0] * db[0];
9427
        w[1] = (sp_int_word)da[0] * db[1];
9428
        w[2] = (sp_int_word)da[1] * db[0];
9429
        w[3] = (sp_int_word)da[0] * db[2];
9430
        w[4] = (sp_int_word)da[1] * db[1];
9431
        w[5] = (sp_int_word)da[2] * db[0];
9432
        w[6] = (sp_int_word)da[0] * db[3];
9433
        w[7] = (sp_int_word)da[1] * db[2];
9434
        w[8] = (sp_int_word)da[2] * db[1];
9435
        w[9] = (sp_int_word)da[3] * db[0];
9436
        w[10] = (sp_int_word)da[1] * db[3];
9437
        w[11] = (sp_int_word)da[2] * db[2];
9438
        w[12] = (sp_int_word)da[3] * db[1];
9439
        w[13] = (sp_int_word)da[2] * db[3];
9440
        w[14] = (sp_int_word)da[3] * db[2];
9441
        w[15] = (sp_int_word)da[3] * db[3];
9442
9443
        r->dp[0] = (sp_int_digit)w[0];
9444
        w[0] >>= SP_WORD_SIZE;
9445
        w[0] += (sp_int_digit)w[1];
9446
        w[0] += (sp_int_digit)w[2];
9447
        r->dp[1] = (sp_int_digit)w[0];
9448
        w[0] >>= SP_WORD_SIZE;
9449
        w[1] >>= SP_WORD_SIZE;
9450
        w[0] += (sp_int_digit)w[1];
9451
        w[2] >>= SP_WORD_SIZE;
9452
        w[0] += (sp_int_digit)w[2];
9453
        w[0] += (sp_int_digit)w[3];
9454
        w[0] += (sp_int_digit)w[4];
9455
        w[0] += (sp_int_digit)w[5];
9456
        r->dp[2] = (sp_int_digit)w[0];
9457
        w[0] >>= SP_WORD_SIZE;
9458
        w[3] >>= SP_WORD_SIZE;
9459
        w[0] += (sp_int_digit)w[3];
9460
        w[4] >>= SP_WORD_SIZE;
9461
        w[0] += (sp_int_digit)w[4];
9462
        w[5] >>= SP_WORD_SIZE;
9463
        w[0] += (sp_int_digit)w[5];
9464
        w[0] += (sp_int_digit)w[6];
9465
        w[0] += (sp_int_digit)w[7];
9466
        w[0] += (sp_int_digit)w[8];
9467
        w[0] += (sp_int_digit)w[9];
9468
        r->dp[3] = (sp_int_digit)w[0];
9469
        w[0] >>= SP_WORD_SIZE;
9470
        w[6] >>= SP_WORD_SIZE;
9471
        w[0] += (sp_int_digit)w[6];
9472
        w[7] >>= SP_WORD_SIZE;
9473
        w[0] += (sp_int_digit)w[7];
9474
        w[8] >>= SP_WORD_SIZE;
9475
        w[0] += (sp_int_digit)w[8];
9476
        w[9] >>= SP_WORD_SIZE;
9477
        w[0] += (sp_int_digit)w[9];
9478
        w[0] += (sp_int_digit)w[10];
9479
        w[0] += (sp_int_digit)w[11];
9480
        w[0] += (sp_int_digit)w[12];
9481
        r->dp[4] = (sp_int_digit)w[0];
9482
        w[0] >>= SP_WORD_SIZE;
9483
        w[10] >>= SP_WORD_SIZE;
9484
        w[0] += (sp_int_digit)w[10];
9485
        w[11] >>= SP_WORD_SIZE;
9486
        w[0] += (sp_int_digit)w[11];
9487
        w[12] >>= SP_WORD_SIZE;
9488
        w[0] += (sp_int_digit)w[12];
9489
        w[0] += (sp_int_digit)w[13];
9490
        w[0] += (sp_int_digit)w[14];
9491
        r->dp[5] = (sp_int_digit)w[0];
9492
        w[0] >>= SP_WORD_SIZE;
9493
        w[13] >>= SP_WORD_SIZE;
9494
        w[0] += (sp_int_digit)w[13];
9495
        w[14] >>= SP_WORD_SIZE;
9496
        w[0] += (sp_int_digit)w[14];
9497
        w[0] += (sp_int_digit)w[15];
9498
        r->dp[6] = (sp_int_digit)w[0];
9499
        w[0] >>= SP_WORD_SIZE;
9500
        w[15] >>= SP_WORD_SIZE;
9501
        w[0] += (sp_int_digit)w[15];
9502
        r->dp[7] = (sp_int_digit)w[0];
9503
9504
        r->used = 8;
9505
        sp_clamp(r);
9506
    }
9507
9508
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9509
    XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
9510
#endif
9511
    return err;
9512
}
9513
#else /* SQR_MUL_ASM */
9514
/* Multiply a by b and store in r: r = a * b
9515
 *
9516
 * Comba implementation.
9517
 *
9518
 * @param  [in]   a  SP integer to multiply.
9519
 * @param  [in]   b  SP integer to multiply.
9520
 * @param  [out]  r  SP integer result.
9521
 *
9522
 * @return  MP_OKAY on success.
9523
 * @return  MP_MEM when dynamic memory allocation fails.
9524
 */
9525
static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
9526
{
9527
    sp_int_digit l = 0;
9528
    sp_int_digit h = 0;
9529
    sp_int_digit o = 0;
9530
    sp_int_digit t[4];
9531
9532
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9533
    t[0] = h;
9534
    h = 0;
9535
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9536
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9537
    t[1] = l;
9538
    l = h;
9539
    h = o;
9540
    o = 0;
9541
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9542
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9543
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9544
    t[2] = l;
9545
    l = h;
9546
    h = o;
9547
    o = 0;
9548
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9549
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9550
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9551
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9552
    t[3] = l;
9553
    l = h;
9554
    h = o;
9555
    o = 0;
9556
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9557
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9558
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9559
    r->dp[4] = l;
9560
    l = h;
9561
    h = o;
9562
    o = 0;
9563
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9564
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9565
    r->dp[5] = l;
9566
    l = h;
9567
    h = o;
9568
    SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
9569
    r->dp[6] = l;
9570
    r->dp[7] = h;
9571
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
9572
    r->used = 8;
9573
    sp_clamp(r);
9574
9575
    return MP_OKAY;
9576
}
9577
#endif /* SQR_MUL_ASM */
9578
#endif /* SP_WORD_SIZE == 64 */
9579
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
9580
#ifdef SQR_MUL_ASM
9581
/* Multiply a by b and store in r: r = a * b
9582
 *
9583
 * Comba implementation.
9584
 *
9585
 * @param  [in]   a  SP integer to multiply.
9586
 * @param  [in]   b  SP integer to multiply.
9587
 * @param  [out]  r  SP integer result.
9588
 *
9589
 * @return  MP_OKAY on success.
9590
 * @return  MP_MEM when dynamic memory allocation fails.
9591
 */
9592
static int _sp_mul_6(const sp_int* a, const sp_int* b, sp_int* r)
9593
{
9594
    sp_int_digit l = 0;
9595
    sp_int_digit h = 0;
9596
    sp_int_digit o = 0;
9597
    sp_int_digit t[6];
9598
9599
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9600
    t[0] = h;
9601
    h = 0;
9602
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9603
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9604
    t[1] = l;
9605
    l = h;
9606
    h = o;
9607
    o = 0;
9608
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9609
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9610
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9611
    t[2] = l;
9612
    l = h;
9613
    h = o;
9614
    o = 0;
9615
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9616
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9617
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9618
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9619
    t[3] = l;
9620
    l = h;
9621
    h = o;
9622
    o = 0;
9623
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9624
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9625
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9626
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9627
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9628
    t[4] = l;
9629
    l = h;
9630
    h = o;
9631
    o = 0;
9632
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9633
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9634
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9635
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9636
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9637
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9638
    t[5] = l;
9639
    l = h;
9640
    h = o;
9641
    o = 0;
9642
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9643
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9644
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9645
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9646
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9647
    r->dp[6] = l;
9648
    l = h;
9649
    h = o;
9650
    o = 0;
9651
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9652
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9653
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9654
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9655
    r->dp[7] = l;
9656
    l = h;
9657
    h = o;
9658
    o = 0;
9659
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9660
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9661
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9662
    r->dp[8] = l;
9663
    l = h;
9664
    h = o;
9665
    o = 0;
9666
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9667
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9668
    r->dp[9] = l;
9669
    l = h;
9670
    h = o;
9671
    SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
9672
    r->dp[10] = l;
9673
    r->dp[11] = h;
9674
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
9675
    r->used = 12;
9676
    sp_clamp(r);
9677
9678
    return MP_OKAY;
9679
}
9680
#endif /* SQR_MUL_ASM */
9681
#endif /* SP_WORD_SIZE == 64 */
9682
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
9683
#ifdef SQR_MUL_ASM
9684
/* Multiply a by b and store in r: r = a * b
9685
 *
9686
 * Comba implementation.
9687
 *
9688
 * @param  [in]   a  SP integer to multiply.
9689
 * @param  [in]   b  SP integer to multiply.
9690
 * @param  [out]  r  SP integer result.
9691
 *
9692
 * @return  MP_OKAY on success.
9693
 * @return  MP_MEM when dynamic memory allocation fails.
9694
 */
9695
static int _sp_mul_8(const sp_int* a, const sp_int* b, sp_int* r)
9696
{
9697
    sp_int_digit l = 0;
9698
    sp_int_digit h = 0;
9699
    sp_int_digit o = 0;
9700
    sp_int_digit t[8];
9701
9702
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9703
    t[0] = h;
9704
    h = 0;
9705
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9706
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9707
    t[1] = l;
9708
    l = h;
9709
    h = o;
9710
    o = 0;
9711
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9712
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9713
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9714
    t[2] = l;
9715
    l = h;
9716
    h = o;
9717
    o = 0;
9718
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9719
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9720
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9721
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9722
    t[3] = l;
9723
    l = h;
9724
    h = o;
9725
    o = 0;
9726
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9727
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9728
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9729
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9730
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9731
    t[4] = l;
9732
    l = h;
9733
    h = o;
9734
    o = 0;
9735
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9736
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9737
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9738
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9739
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9740
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9741
    t[5] = l;
9742
    l = h;
9743
    h = o;
9744
    o = 0;
9745
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
9746
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9747
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9748
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9749
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9750
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9751
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
9752
    t[6] = l;
9753
    l = h;
9754
    h = o;
9755
    o = 0;
9756
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
9757
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
9758
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9759
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9760
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9761
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9762
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
9763
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
9764
    t[7] = l;
9765
    l = h;
9766
    h = o;
9767
    o = 0;
9768
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
9769
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
9770
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9771
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9772
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9773
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
9774
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
9775
    r->dp[8] = l;
9776
    l = h;
9777
    h = o;
9778
    o = 0;
9779
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
9780
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
9781
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9782
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9783
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
9784
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
9785
    r->dp[9] = l;
9786
    l = h;
9787
    h = o;
9788
    o = 0;
9789
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
9790
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
9791
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
9792
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
9793
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
9794
    r->dp[10] = l;
9795
    l = h;
9796
    h = o;
9797
    o = 0;
9798
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
9799
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
9800
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
9801
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
9802
    r->dp[11] = l;
9803
    l = h;
9804
    h = o;
9805
    o = 0;
9806
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
9807
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
9808
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
9809
    r->dp[12] = l;
9810
    l = h;
9811
    h = o;
9812
    o = 0;
9813
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
9814
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
9815
    r->dp[13] = l;
9816
    l = h;
9817
    h = o;
9818
    SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
9819
    r->dp[14] = l;
9820
    r->dp[15] = h;
9821
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
9822
    r->used = 16;
9823
    sp_clamp(r);
9824
9825
    return MP_OKAY;
9826
}
9827
#endif /* SQR_MUL_ASM */
9828
#endif /* SP_WORD_SIZE == 32 */
9829
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
9830
#ifdef SQR_MUL_ASM
9831
/* Multiply a by b and store in r: r = a * b
9832
 *
9833
 * Comba implementation.
9834
 *
9835
 * @param  [in]   a  SP integer to multiply.
9836
 * @param  [in]   b  SP integer to multiply.
9837
 * @param  [out]  r  SP integer result.
9838
 *
9839
 * @return  MP_OKAY on success.
9840
 * @return  MP_MEM when dynamic memory allocation fails.
9841
 */
9842
static int _sp_mul_12(const sp_int* a, const sp_int* b, sp_int* r)
9843
{
9844
    sp_int_digit l = 0;
9845
    sp_int_digit h = 0;
9846
    sp_int_digit o = 0;
9847
    sp_int_digit t[12];
9848
9849
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9850
    t[0] = h;
9851
    h = 0;
9852
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9853
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9854
    t[1] = l;
9855
    l = h;
9856
    h = o;
9857
    o = 0;
9858
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9859
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9860
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9861
    t[2] = l;
9862
    l = h;
9863
    h = o;
9864
    o = 0;
9865
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9866
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9867
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9868
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9869
    t[3] = l;
9870
    l = h;
9871
    h = o;
9872
    o = 0;
9873
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9874
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9875
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9876
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9877
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9878
    t[4] = l;
9879
    l = h;
9880
    h = o;
9881
    o = 0;
9882
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9883
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9884
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9885
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9886
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9887
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9888
    t[5] = l;
9889
    l = h;
9890
    h = o;
9891
    o = 0;
9892
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
9893
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9894
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9895
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9896
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9897
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9898
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
9899
    t[6] = l;
9900
    l = h;
9901
    h = o;
9902
    o = 0;
9903
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
9904
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
9905
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9906
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9907
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9908
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9909
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
9910
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
9911
    t[7] = l;
9912
    l = h;
9913
    h = o;
9914
    o = 0;
9915
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
9916
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
9917
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
9918
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9919
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9920
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9921
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
9922
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
9923
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
9924
    t[8] = l;
9925
    l = h;
9926
    h = o;
9927
    o = 0;
9928
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
9929
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
9930
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
9931
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
9932
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9933
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9934
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
9935
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
9936
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
9937
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
9938
    t[9] = l;
9939
    l = h;
9940
    h = o;
9941
    o = 0;
9942
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
9943
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
9944
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
9945
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
9946
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
9947
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
9948
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
9949
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
9950
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
9951
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
9952
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
9953
    t[10] = l;
9954
    l = h;
9955
    h = o;
9956
    o = 0;
9957
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
9958
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
9959
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
9960
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
9961
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
9962
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
9963
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
9964
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
9965
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
9966
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
9967
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
9968
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
9969
    t[11] = l;
9970
    l = h;
9971
    h = o;
9972
    o = 0;
9973
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
9974
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
9975
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
9976
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
9977
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
9978
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
9979
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
9980
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
9981
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
9982
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
9983
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
9984
    r->dp[12] = l;
9985
    l = h;
9986
    h = o;
9987
    o = 0;
9988
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
9989
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
9990
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
9991
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
9992
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
9993
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
9994
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
9995
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
9996
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
9997
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
9998
    r->dp[13] = l;
9999
    l = h;
10000
    h = o;
10001
    o = 0;
10002
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10003
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10004
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10005
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10006
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10007
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10008
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10009
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10010
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10011
    r->dp[14] = l;
10012
    l = h;
10013
    h = o;
10014
    o = 0;
10015
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10016
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10017
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10018
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10019
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10020
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10021
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10022
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10023
    r->dp[15] = l;
10024
    l = h;
10025
    h = o;
10026
    o = 0;
10027
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10028
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10029
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10030
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10031
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10032
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10033
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10034
    r->dp[16] = l;
10035
    l = h;
10036
    h = o;
10037
    o = 0;
10038
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10039
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10040
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10041
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10042
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10043
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10044
    r->dp[17] = l;
10045
    l = h;
10046
    h = o;
10047
    o = 0;
10048
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10049
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10050
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10051
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10052
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10053
    r->dp[18] = l;
10054
    l = h;
10055
    h = o;
10056
    o = 0;
10057
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10058
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10059
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10060
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10061
    r->dp[19] = l;
10062
    l = h;
10063
    h = o;
10064
    o = 0;
10065
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10066
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10067
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10068
    r->dp[20] = l;
10069
    l = h;
10070
    h = o;
10071
    o = 0;
10072
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10073
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10074
    r->dp[21] = l;
10075
    l = h;
10076
    h = o;
10077
    SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
10078
    r->dp[22] = l;
10079
    r->dp[23] = h;
10080
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
10081
    r->used = 24;
10082
    sp_clamp(r);
10083
10084
    return MP_OKAY;
10085
}
10086
#endif /* SQR_MUL_ASM */
10087
#endif /* SP_WORD_SIZE == 32 */
10088
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
10089
10090
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
10091
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
10092
    (SP_WORD_SIZE == 64)))
10093
    #if SP_INT_DIGITS >= 32
10094
/* Multiply a by b and store in r: r = a * b
10095
 *
10096
 * Comba implementation.
10097
 *
10098
 * @param  [in]   a  SP integer to multiply.
10099
 * @param  [in]   b  SP integer to multiply.
10100
 * @param  [out]  r  SP integer result.
10101
 *
10102
 * @return  MP_OKAY on success.
10103
 * @return  MP_MEM when dynamic memory allocation fails.
10104
 */
10105
static int _sp_mul_16(const sp_int* a, const sp_int* b, sp_int* r)
10106
{
10107
    int err = MP_OKAY;
10108
    sp_int_digit l = 0;
10109
    sp_int_digit h = 0;
10110
    sp_int_digit o = 0;
10111
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10112
    sp_int_digit* t = NULL;
10113
#else
10114
    sp_int_digit t[16];
10115
#endif
10116
10117
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10118
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
10119
         DYNAMIC_TYPE_BIGINT);
10120
     if (t == NULL) {
10121
         err = MP_MEM;
10122
     }
10123
#endif
10124
    if (err == MP_OKAY) {
10125
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
10126
        t[0] = h;
10127
        h = 0;
10128
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
10129
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
10130
        t[1] = l;
10131
        l = h;
10132
        h = o;
10133
        o = 0;
10134
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
10135
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
10136
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
10137
        t[2] = l;
10138
        l = h;
10139
        h = o;
10140
        o = 0;
10141
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
10142
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
10143
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
10144
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
10145
        t[3] = l;
10146
        l = h;
10147
        h = o;
10148
        o = 0;
10149
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
10150
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
10151
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
10152
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10153
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10154
        t[4] = l;
10155
        l = h;
10156
        h = o;
10157
        o = 0;
10158
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10159
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10160
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10161
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10162
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10163
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10164
        t[5] = l;
10165
        l = h;
10166
        h = o;
10167
        o = 0;
10168
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10169
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10170
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10171
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10172
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10173
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10174
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10175
        t[6] = l;
10176
        l = h;
10177
        h = o;
10178
        o = 0;
10179
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10180
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10181
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10182
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10183
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10184
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10185
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10186
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10187
        t[7] = l;
10188
        l = h;
10189
        h = o;
10190
        o = 0;
10191
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10192
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10193
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10194
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10195
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10196
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10197
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10198
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10199
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10200
        t[8] = l;
10201
        l = h;
10202
        h = o;
10203
        o = 0;
10204
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10205
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10206
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10207
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10208
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10209
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10210
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10211
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10212
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10213
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10214
        t[9] = l;
10215
        l = h;
10216
        h = o;
10217
        o = 0;
10218
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10219
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10220
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10221
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10222
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10223
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10224
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10225
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10226
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10227
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10228
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10229
        t[10] = l;
10230
        l = h;
10231
        h = o;
10232
        o = 0;
10233
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10234
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10235
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10236
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10237
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10238
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10239
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10240
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10241
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10242
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10243
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10244
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10245
        t[11] = l;
10246
        l = h;
10247
        h = o;
10248
        o = 0;
10249
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
10250
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10251
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10252
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10253
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10254
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10255
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10256
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10257
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10258
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10259
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10260
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10261
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
10262
        t[12] = l;
10263
        l = h;
10264
        h = o;
10265
        o = 0;
10266
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
10267
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
10268
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10269
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10270
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10271
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10272
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10273
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10274
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10275
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10276
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10277
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10278
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
10279
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
10280
        t[13] = l;
10281
        l = h;
10282
        h = o;
10283
        o = 0;
10284
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
10285
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
10286
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
10287
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10288
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10289
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10290
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10291
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10292
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10293
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10294
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10295
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10296
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
10297
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
10298
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
10299
        t[14] = l;
10300
        l = h;
10301
        h = o;
10302
        o = 0;
10303
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
10304
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
10305
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
10306
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
10307
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10308
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10309
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10310
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10311
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10312
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10313
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10314
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10315
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
10316
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
10317
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
10318
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
10319
        t[15] = l;
10320
        l = h;
10321
        h = o;
10322
        o = 0;
10323
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
10324
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
10325
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
10326
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
10327
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10328
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10329
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10330
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10331
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10332
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10333
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10334
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
10335
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
10336
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
10337
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
10338
        r->dp[16] = l;
10339
        l = h;
10340
        h = o;
10341
        o = 0;
10342
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
10343
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
10344
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
10345
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
10346
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10347
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10348
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10349
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10350
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10351
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10352
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
10353
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
10354
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
10355
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
10356
        r->dp[17] = l;
10357
        l = h;
10358
        h = o;
10359
        o = 0;
10360
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
10361
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
10362
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
10363
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
10364
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10365
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10366
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10367
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10368
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10369
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
10370
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
10371
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
10372
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
10373
        r->dp[18] = l;
10374
        l = h;
10375
        h = o;
10376
        o = 0;
10377
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
10378
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
10379
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
10380
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
10381
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10382
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10383
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10384
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10385
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
10386
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
10387
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
10388
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
10389
        r->dp[19] = l;
10390
        l = h;
10391
        h = o;
10392
        o = 0;
10393
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
10394
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
10395
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
10396
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
10397
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10398
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10399
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10400
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
10401
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
10402
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
10403
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
10404
        r->dp[20] = l;
10405
        l = h;
10406
        h = o;
10407
        o = 0;
10408
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
10409
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
10410
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
10411
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
10412
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10413
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10414
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
10415
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
10416
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
10417
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
10418
        r->dp[21] = l;
10419
        l = h;
10420
        h = o;
10421
        o = 0;
10422
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
10423
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
10424
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
10425
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
10426
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
10427
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
10428
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
10429
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
10430
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
10431
        r->dp[22] = l;
10432
        l = h;
10433
        h = o;
10434
        o = 0;
10435
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
10436
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
10437
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
10438
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
10439
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
10440
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
10441
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
10442
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
10443
        r->dp[23] = l;
10444
        l = h;
10445
        h = o;
10446
        o = 0;
10447
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
10448
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
10449
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
10450
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
10451
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
10452
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
10453
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
10454
        r->dp[24] = l;
10455
        l = h;
10456
        h = o;
10457
        o = 0;
10458
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
10459
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
10460
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
10461
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
10462
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
10463
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
10464
        r->dp[25] = l;
10465
        l = h;
10466
        h = o;
10467
        o = 0;
10468
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
10469
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
10470
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
10471
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
10472
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
10473
        r->dp[26] = l;
10474
        l = h;
10475
        h = o;
10476
        o = 0;
10477
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
10478
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
10479
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
10480
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
10481
        r->dp[27] = l;
10482
        l = h;
10483
        h = o;
10484
        o = 0;
10485
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
10486
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
10487
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
10488
        r->dp[28] = l;
10489
        l = h;
10490
        h = o;
10491
        o = 0;
10492
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
10493
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
10494
        r->dp[29] = l;
10495
        l = h;
10496
        h = o;
10497
        SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
10498
        r->dp[30] = l;
10499
        r->dp[31] = h;
10500
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
10501
        r->used = 32;
10502
        sp_clamp(r);
10503
    }
10504
10505
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10506
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
10507
#endif
10508
    return err;
10509
}
10510
    #endif /* SP_INT_DIGITS >= 32 */
10511
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
10512
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
10513
10514
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
10515
    #if SP_INT_DIGITS >= 48
10516
/* Multiply a by b and store in r: r = a * b
10517
 *
10518
 * Comba implementation.
10519
 *
10520
 * @param  [in]   a  SP integer to multiply.
10521
 * @param  [in]   b  SP integer to multiply.
10522
 * @param  [out]  r  SP integer result.
10523
 *
10524
 * @return  MP_OKAY on success.
10525
 * @return  MP_MEM when dynamic memory allocation fails.
10526
 */
10527
static int _sp_mul_24(const sp_int* a, const sp_int* b, sp_int* r)
10528
{
10529
    int err = MP_OKAY;
10530
    sp_int_digit l = 0;
10531
    sp_int_digit h = 0;
10532
    sp_int_digit o = 0;
10533
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10534
    sp_int_digit* t = NULL;
10535
#else
10536
    sp_int_digit t[24];
10537
#endif
10538
10539
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10540
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
10541
         DYNAMIC_TYPE_BIGINT);
10542
     if (t == NULL) {
10543
         err = MP_MEM;
10544
     }
10545
#endif
10546
    if (err == MP_OKAY) {
10547
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
10548
        t[0] = h;
10549
        h = 0;
10550
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
10551
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
10552
        t[1] = l;
10553
        l = h;
10554
        h = o;
10555
        o = 0;
10556
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
10557
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
10558
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
10559
        t[2] = l;
10560
        l = h;
10561
        h = o;
10562
        o = 0;
10563
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
10564
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
10565
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
10566
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
10567
        t[3] = l;
10568
        l = h;
10569
        h = o;
10570
        o = 0;
10571
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
10572
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
10573
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
10574
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10575
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10576
        t[4] = l;
10577
        l = h;
10578
        h = o;
10579
        o = 0;
10580
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10581
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10582
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10583
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10584
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10585
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10586
        t[5] = l;
10587
        l = h;
10588
        h = o;
10589
        o = 0;
10590
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10591
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10592
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10593
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10594
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10595
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10596
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10597
        t[6] = l;
10598
        l = h;
10599
        h = o;
10600
        o = 0;
10601
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10602
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10603
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10604
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10605
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10606
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10607
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10608
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10609
        t[7] = l;
10610
        l = h;
10611
        h = o;
10612
        o = 0;
10613
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10614
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10615
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10616
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10617
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10618
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10619
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10620
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10621
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10622
        t[8] = l;
10623
        l = h;
10624
        h = o;
10625
        o = 0;
10626
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10627
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10628
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10629
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10630
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10631
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10632
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10633
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10634
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10635
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10636
        t[9] = l;
10637
        l = h;
10638
        h = o;
10639
        o = 0;
10640
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10641
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10642
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10643
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10644
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10645
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10646
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10647
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10648
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10649
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10650
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10651
        t[10] = l;
10652
        l = h;
10653
        h = o;
10654
        o = 0;
10655
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10656
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10657
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10658
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10659
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10660
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10661
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10662
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10663
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10664
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10665
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10666
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10667
        t[11] = l;
10668
        l = h;
10669
        h = o;
10670
        o = 0;
10671
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
10672
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10673
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10674
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10675
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10676
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10677
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10678
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10679
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10680
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10681
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10682
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10683
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
10684
        t[12] = l;
10685
        l = h;
10686
        h = o;
10687
        o = 0;
10688
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
10689
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
10690
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10691
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10692
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10693
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10694
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10695
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10696
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10697
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10698
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10699
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10700
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
10701
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
10702
        t[13] = l;
10703
        l = h;
10704
        h = o;
10705
        o = 0;
10706
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
10707
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
10708
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
10709
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10710
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10711
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10712
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10713
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10714
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10715
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10716
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10717
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10718
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
10719
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
10720
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
10721
        t[14] = l;
10722
        l = h;
10723
        h = o;
10724
        o = 0;
10725
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
10726
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
10727
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
10728
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
10729
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10730
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10731
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10732
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10733
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10734
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10735
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10736
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10737
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
10738
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
10739
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
10740
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
10741
        t[15] = l;
10742
        l = h;
10743
        h = o;
10744
        o = 0;
10745
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
10746
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
10747
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
10748
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
10749
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
10750
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10751
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10752
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10753
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10754
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10755
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10756
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10757
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
10758
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
10759
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
10760
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
10761
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
10762
        t[16] = l;
10763
        l = h;
10764
        h = o;
10765
        o = 0;
10766
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
10767
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
10768
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
10769
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
10770
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
10771
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
10772
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10773
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10774
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10775
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10776
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10777
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10778
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
10779
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
10780
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
10781
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
10782
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
10783
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
10784
        t[17] = l;
10785
        l = h;
10786
        h = o;
10787
        o = 0;
10788
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
10789
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
10790
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
10791
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
10792
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
10793
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
10794
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
10795
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10796
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10797
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10798
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10799
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10800
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
10801
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
10802
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
10803
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
10804
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
10805
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
10806
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
10807
        t[18] = l;
10808
        l = h;
10809
        h = o;
10810
        o = 0;
10811
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
10812
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
10813
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
10814
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
10815
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
10816
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
10817
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
10818
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
10819
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10820
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10821
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10822
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10823
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
10824
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
10825
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
10826
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
10827
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
10828
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
10829
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
10830
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
10831
        t[19] = l;
10832
        l = h;
10833
        h = o;
10834
        o = 0;
10835
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
10836
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
10837
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
10838
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
10839
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
10840
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
10841
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
10842
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
10843
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
10844
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10845
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10846
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10847
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
10848
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
10849
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
10850
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
10851
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
10852
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
10853
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
10854
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
10855
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
10856
        t[20] = l;
10857
        l = h;
10858
        h = o;
10859
        o = 0;
10860
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
10861
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
10862
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
10863
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
10864
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
10865
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
10866
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
10867
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
10868
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
10869
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
10870
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10871
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10872
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
10873
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
10874
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
10875
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
10876
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
10877
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
10878
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
10879
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
10880
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
10881
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
10882
        t[21] = l;
10883
        l = h;
10884
        h = o;
10885
        o = 0;
10886
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
10887
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
10888
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
10889
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
10890
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
10891
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
10892
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
10893
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
10894
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
10895
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
10896
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
10897
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
10898
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
10899
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
10900
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
10901
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
10902
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
10903
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
10904
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
10905
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
10906
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
10907
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
10908
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
10909
        t[22] = l;
10910
        l = h;
10911
        h = o;
10912
        o = 0;
10913
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
10914
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
10915
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
10916
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
10917
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
10918
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
10919
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
10920
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
10921
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
10922
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
10923
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
10924
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
10925
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
10926
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
10927
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
10928
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
10929
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
10930
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
10931
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
10932
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
10933
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
10934
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
10935
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
10936
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
10937
        t[23] = l;
10938
        l = h;
10939
        h = o;
10940
        o = 0;
10941
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
10942
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
10943
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
10944
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
10945
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
10946
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
10947
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
10948
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
10949
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
10950
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
10951
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
10952
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
10953
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
10954
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
10955
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
10956
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
10957
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
10958
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
10959
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
10960
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
10961
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
10962
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
10963
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
10964
        r->dp[24] = l;
10965
        l = h;
10966
        h = o;
10967
        o = 0;
10968
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
10969
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
10970
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
10971
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
10972
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
10973
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
10974
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
10975
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
10976
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
10977
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
10978
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
10979
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
10980
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
10981
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
10982
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
10983
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
10984
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
10985
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
10986
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
10987
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
10988
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
10989
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
10990
        r->dp[25] = l;
10991
        l = h;
10992
        h = o;
10993
        o = 0;
10994
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
10995
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
10996
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
10997
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
10998
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
10999
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
11000
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
11001
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
11002
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
11003
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
11004
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
11005
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
11006
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
11007
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
11008
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
11009
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
11010
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
11011
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
11012
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
11013
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
11014
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
11015
        r->dp[26] = l;
11016
        l = h;
11017
        h = o;
11018
        o = 0;
11019
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
11020
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
11021
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
11022
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
11023
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
11024
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
11025
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
11026
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
11027
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
11028
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
11029
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
11030
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
11031
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
11032
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
11033
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
11034
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
11035
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
11036
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
11037
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
11038
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
11039
        r->dp[27] = l;
11040
        l = h;
11041
        h = o;
11042
        o = 0;
11043
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
11044
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
11045
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
11046
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
11047
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
11048
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
11049
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
11050
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
11051
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
11052
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
11053
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
11054
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
11055
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
11056
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
11057
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
11058
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
11059
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
11060
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
11061
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
11062
        r->dp[28] = l;
11063
        l = h;
11064
        h = o;
11065
        o = 0;
11066
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
11067
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
11068
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
11069
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
11070
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
11071
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
11072
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
11073
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
11074
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
11075
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
11076
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
11077
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
11078
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
11079
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
11080
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
11081
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
11082
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
11083
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
11084
        r->dp[29] = l;
11085
        l = h;
11086
        h = o;
11087
        o = 0;
11088
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
11089
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
11090
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
11091
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
11092
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
11093
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
11094
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
11095
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
11096
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
11097
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
11098
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
11099
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
11100
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
11101
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
11102
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
11103
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
11104
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
11105
        r->dp[30] = l;
11106
        l = h;
11107
        h = o;
11108
        o = 0;
11109
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
11110
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
11111
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
11112
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
11113
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
11114
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
11115
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
11116
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
11117
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
11118
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
11119
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
11120
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
11121
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
11122
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
11123
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
11124
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
11125
        r->dp[31] = l;
11126
        l = h;
11127
        h = o;
11128
        o = 0;
11129
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
11130
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
11131
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
11132
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
11133
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
11134
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
11135
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
11136
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
11137
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
11138
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
11139
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
11140
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
11141
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
11142
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
11143
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
11144
        r->dp[32] = l;
11145
        l = h;
11146
        h = o;
11147
        o = 0;
11148
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
11149
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
11150
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
11151
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
11152
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
11153
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
11154
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
11155
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
11156
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
11157
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
11158
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
11159
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
11160
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
11161
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
11162
        r->dp[33] = l;
11163
        l = h;
11164
        h = o;
11165
        o = 0;
11166
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
11167
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
11168
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
11169
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
11170
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
11171
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
11172
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
11173
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
11174
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
11175
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
11176
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
11177
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
11178
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
11179
        r->dp[34] = l;
11180
        l = h;
11181
        h = o;
11182
        o = 0;
11183
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
11184
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
11185
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
11186
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
11187
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
11188
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
11189
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
11190
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
11191
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
11192
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
11193
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
11194
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
11195
        r->dp[35] = l;
11196
        l = h;
11197
        h = o;
11198
        o = 0;
11199
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
11200
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
11201
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
11202
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
11203
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
11204
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
11205
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
11206
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
11207
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
11208
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
11209
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
11210
        r->dp[36] = l;
11211
        l = h;
11212
        h = o;
11213
        o = 0;
11214
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
11215
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
11216
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
11217
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
11218
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
11219
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
11220
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
11221
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
11222
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
11223
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
11224
        r->dp[37] = l;
11225
        l = h;
11226
        h = o;
11227
        o = 0;
11228
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
11229
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
11230
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
11231
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
11232
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
11233
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
11234
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
11235
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
11236
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
11237
        r->dp[38] = l;
11238
        l = h;
11239
        h = o;
11240
        o = 0;
11241
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
11242
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
11243
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
11244
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
11245
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
11246
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
11247
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
11248
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
11249
        r->dp[39] = l;
11250
        l = h;
11251
        h = o;
11252
        o = 0;
11253
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
11254
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
11255
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
11256
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
11257
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
11258
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
11259
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
11260
        r->dp[40] = l;
11261
        l = h;
11262
        h = o;
11263
        o = 0;
11264
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
11265
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
11266
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
11267
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
11268
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
11269
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
11270
        r->dp[41] = l;
11271
        l = h;
11272
        h = o;
11273
        o = 0;
11274
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
11275
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
11276
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
11277
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
11278
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
11279
        r->dp[42] = l;
11280
        l = h;
11281
        h = o;
11282
        o = 0;
11283
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
11284
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
11285
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
11286
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
11287
        r->dp[43] = l;
11288
        l = h;
11289
        h = o;
11290
        o = 0;
11291
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
11292
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
11293
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
11294
        r->dp[44] = l;
11295
        l = h;
11296
        h = o;
11297
        o = 0;
11298
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
11299
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
11300
        r->dp[45] = l;
11301
        l = h;
11302
        h = o;
11303
        SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
11304
        r->dp[46] = l;
11305
        r->dp[47] = h;
11306
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
11307
        r->used = 48;
11308
        sp_clamp(r);
11309
    }
11310
11311
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
11312
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
11313
#endif
11314
    return err;
11315
}
11316
    #endif /* SP_INT_DIGITS >= 48 */
11317
11318
    #if SP_INT_DIGITS >= 64
11319
/* Multiply a by b and store in r: r = a * b
11320
 *
11321
 * Karatsuba implementation.
11322
 *
11323
 * @param  [in]   a  SP integer to multiply.
11324
 * @param  [in]   b  SP integer to multiply.
11325
 * @param  [out]  r  SP integer result.
11326
 *
11327
 * @return  MP_OKAY on success.
11328
 * @return  MP_MEM when dynamic memory allocation fails.
11329
 */
11330
static int _sp_mul_32(const sp_int* a, const sp_int* b, sp_int* r)
11331
{
11332
    int err = MP_OKAY;
11333
    unsigned int i;
11334
    sp_int_digit l;
11335
    sp_int_digit h;
11336
    sp_int* a1;
11337
    sp_int* b1;
11338
    sp_int* z0;
11339
    sp_int* z1;
11340
    sp_int* z2;
11341
    sp_int_digit ca;
11342
    sp_int_digit cb;
11343
    DECL_SP_INT_ARRAY(t, 16, 2);
11344
    DECL_SP_INT_ARRAY(z, 33, 2);
11345
11346
    ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
11347
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
11348
    if (err == MP_OKAY) {
11349
        a1 = t[0];
11350
        b1 = t[1];
11351
        z1 = z[0];
11352
        z2 = z[1];
11353
        z0 = r;
11354
11355
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
11356
        a1->used = 16;
11357
        XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
11358
        b1->used = 16;
11359
11360
        /* z2 = a1 * b1 */
11361
        err = _sp_mul_16(a1, b1, z2);
11362
    }
11363
    if (err == MP_OKAY) {
11364
        l = a1->dp[0];
11365
        h = 0;
11366
        SP_ASM_ADDC(l, h, a->dp[0]);
11367
        a1->dp[0] = l;
11368
        l = h;
11369
        h = 0;
11370
        for (i = 1; i < 16; i++) {
11371
            SP_ASM_ADDC(l, h, a1->dp[i]);
11372
            SP_ASM_ADDC(l, h, a->dp[i]);
11373
            a1->dp[i] = l;
11374
            l = h;
11375
            h = 0;
11376
        }
11377
        ca = l;
11378
        /* b01 = b0 + b1 */
11379
        l = b1->dp[0];
11380
        h = 0;
11381
        SP_ASM_ADDC(l, h, b->dp[0]);
11382
        b1->dp[0] = l;
11383
        l = h;
11384
        h = 0;
11385
        for (i = 1; i < 16; i++) {
11386
            SP_ASM_ADDC(l, h, b1->dp[i]);
11387
            SP_ASM_ADDC(l, h, b->dp[i]);
11388
            b1->dp[i] = l;
11389
            l = h;
11390
            h = 0;
11391
        }
11392
        cb = l;
11393
11394
        /* z0 = a0 * b0 */
11395
        err = _sp_mul_16(a, b, z0);
11396
    }
11397
    if (err == MP_OKAY) {
11398
        /* z1 = (a0 + a1) * (b0 + b1) */
11399
        err = _sp_mul_16(a1, b1, z1);
11400
    }
11401
    if (err == MP_OKAY) {
11402
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
11403
        /* r = z0 */
11404
        /* r += (z1 - z0 - z2) << 16 */
11405
        z1->dp[32] = ca & cb;
11406
        l = 0;
11407
        if (ca) {
11408
            h = 0;
11409
            for (i = 0; i < 16; i++) {
11410
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11411
                SP_ASM_ADDC(l, h, b1->dp[i]);
11412
                z1->dp[i + 16] = l;
11413
                l = h;
11414
                h = 0;
11415
            }
11416
        }
11417
        z1->dp[32] += l;
11418
        l = 0;
11419
        if (cb) {
11420
            h = 0;
11421
            for (i = 0; i < 16; i++) {
11422
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11423
                SP_ASM_ADDC(l, h, a1->dp[i]);
11424
                z1->dp[i + 16] = l;
11425
                l = h;
11426
                h = 0;
11427
            }
11428
        }
11429
        z1->dp[32] += l;
11430
        /* z1 = z1 - z0 - z1 */
11431
        l = 0;
11432
        h = 0;
11433
        for (i = 0; i < 32; i++) {
11434
            l += z1->dp[i];
11435
            SP_ASM_SUBB(l, h, z0->dp[i]);
11436
            SP_ASM_SUBB(l, h, z2->dp[i]);
11437
            z1->dp[i] = l;
11438
            l = h;
11439
            h = 0;
11440
        }
11441
        z1->dp[i] += l;
11442
        /* r += z1 << 16 */
11443
        l = 0;
11444
        h = 0;
11445
        for (i = 0; i < 16; i++) {
11446
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
11447
            SP_ASM_ADDC(l, h, z1->dp[i]);
11448
            r->dp[i + 16] = l;
11449
            l = h;
11450
            h = 0;
11451
        }
11452
        for (; i < 33; i++) {
11453
            SP_ASM_ADDC(l, h, z1->dp[i]);
11454
            r->dp[i + 16] = l;
11455
            l = h;
11456
            h = 0;
11457
        }
11458
        /* r += z2 << 32  */
11459
        l = 0;
11460
        h = 0;
11461
        for (i = 0; i < 17; i++) {
11462
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
11463
            SP_ASM_ADDC(l, h, z2->dp[i]);
11464
            r->dp[i + 32] = l;
11465
            l = h;
11466
            h = 0;
11467
        }
11468
        for (; i < 32; i++) {
11469
            SP_ASM_ADDC(l, h, z2->dp[i]);
11470
            r->dp[i + 32] = l;
11471
            l = h;
11472
            h = 0;
11473
        }
11474
        r->used = 64;
11475
        sp_clamp(r);
11476
    }
11477
11478
    FREE_SP_INT_ARRAY(z, NULL);
11479
    FREE_SP_INT_ARRAY(t, NULL);
11480
    return err;
11481
}
11482
    #endif /* SP_INT_DIGITS >= 64 */
11483
11484
    #if SP_INT_DIGITS >= 96
11485
/* Multiply a by b and store in r: r = a * b
11486
 *
11487
 * Karatsuba implementation.
11488
 *
11489
 * @param  [in]   a  SP integer to multiply.
11490
 * @param  [in]   b  SP integer to multiply.
11491
 * @param  [out]  r  SP integer result.
11492
 *
11493
 * @return  MP_OKAY on success.
11494
 * @return  MP_MEM when dynamic memory allocation fails.
11495
 */
11496
static int _sp_mul_48(const sp_int* a, const sp_int* b, sp_int* r)
11497
{
11498
    int err = MP_OKAY;
11499
    unsigned int i;
11500
    sp_int_digit l;
11501
    sp_int_digit h;
11502
    sp_int* a1;
11503
    sp_int* b1;
11504
    sp_int* z0;
11505
    sp_int* z1;
11506
    sp_int* z2;
11507
    sp_int_digit ca;
11508
    sp_int_digit cb;
11509
    DECL_SP_INT_ARRAY(t, 24, 2);
11510
    DECL_SP_INT_ARRAY(z, 49, 2);
11511
11512
    ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
11513
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
11514
    if (err == MP_OKAY) {
11515
        a1 = t[0];
11516
        b1 = t[1];
11517
        z1 = z[0];
11518
        z2 = z[1];
11519
        z0 = r;
11520
11521
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
11522
        a1->used = 24;
11523
        XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
11524
        b1->used = 24;
11525
11526
        /* z2 = a1 * b1 */
11527
        err = _sp_mul_24(a1, b1, z2);
11528
    }
11529
    if (err == MP_OKAY) {
11530
        l = a1->dp[0];
11531
        h = 0;
11532
        SP_ASM_ADDC(l, h, a->dp[0]);
11533
        a1->dp[0] = l;
11534
        l = h;
11535
        h = 0;
11536
        for (i = 1; i < 24; i++) {
11537
            SP_ASM_ADDC(l, h, a1->dp[i]);
11538
            SP_ASM_ADDC(l, h, a->dp[i]);
11539
            a1->dp[i] = l;
11540
            l = h;
11541
            h = 0;
11542
        }
11543
        ca = l;
11544
        /* b01 = b0 + b1 */
11545
        l = b1->dp[0];
11546
        h = 0;
11547
        SP_ASM_ADDC(l, h, b->dp[0]);
11548
        b1->dp[0] = l;
11549
        l = h;
11550
        h = 0;
11551
        for (i = 1; i < 24; i++) {
11552
            SP_ASM_ADDC(l, h, b1->dp[i]);
11553
            SP_ASM_ADDC(l, h, b->dp[i]);
11554
            b1->dp[i] = l;
11555
            l = h;
11556
            h = 0;
11557
        }
11558
        cb = l;
11559
11560
        /* z0 = a0 * b0 */
11561
        err = _sp_mul_24(a, b, z0);
11562
    }
11563
    if (err == MP_OKAY) {
11564
        /* z1 = (a0 + a1) * (b0 + b1) */
11565
        err = _sp_mul_24(a1, b1, z1);
11566
    }
11567
    if (err == MP_OKAY) {
11568
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
11569
        /* r = z0 */
11570
        /* r += (z1 - z0 - z2) << 24 */
11571
        z1->dp[48] = ca & cb;
11572
        l = 0;
11573
        if (ca) {
11574
            h = 0;
11575
            for (i = 0; i < 24; i++) {
11576
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11577
                SP_ASM_ADDC(l, h, b1->dp[i]);
11578
                z1->dp[i + 24] = l;
11579
                l = h;
11580
                h = 0;
11581
            }
11582
        }
11583
        z1->dp[48] += l;
11584
        l = 0;
11585
        if (cb) {
11586
            h = 0;
11587
            for (i = 0; i < 24; i++) {
11588
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11589
                SP_ASM_ADDC(l, h, a1->dp[i]);
11590
                z1->dp[i + 24] = l;
11591
                l = h;
11592
                h = 0;
11593
            }
11594
        }
11595
        z1->dp[48] += l;
11596
        /* z1 = z1 - z0 - z1 */
11597
        l = 0;
11598
        h = 0;
11599
        for (i = 0; i < 48; i++) {
11600
            l += z1->dp[i];
11601
            SP_ASM_SUBB(l, h, z0->dp[i]);
11602
            SP_ASM_SUBB(l, h, z2->dp[i]);
11603
            z1->dp[i] = l;
11604
            l = h;
11605
            h = 0;
11606
        }
11607
        z1->dp[i] += l;
11608
        /* r += z1 << 16 */
11609
        l = 0;
11610
        h = 0;
11611
        for (i = 0; i < 24; i++) {
11612
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
11613
            SP_ASM_ADDC(l, h, z1->dp[i]);
11614
            r->dp[i + 24] = l;
11615
            l = h;
11616
            h = 0;
11617
        }
11618
        for (; i < 49; i++) {
11619
            SP_ASM_ADDC(l, h, z1->dp[i]);
11620
            r->dp[i + 24] = l;
11621
            l = h;
11622
            h = 0;
11623
        }
11624
        /* r += z2 << 48  */
11625
        l = 0;
11626
        h = 0;
11627
        for (i = 0; i < 25; i++) {
11628
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
11629
            SP_ASM_ADDC(l, h, z2->dp[i]);
11630
            r->dp[i + 48] = l;
11631
            l = h;
11632
            h = 0;
11633
        }
11634
        for (; i < 48; i++) {
11635
            SP_ASM_ADDC(l, h, z2->dp[i]);
11636
            r->dp[i + 48] = l;
11637
            l = h;
11638
            h = 0;
11639
        }
11640
        r->used = 96;
11641
        sp_clamp(r);
11642
    }
11643
11644
    FREE_SP_INT_ARRAY(z, NULL);
11645
    FREE_SP_INT_ARRAY(t, NULL);
11646
    return err;
11647
}
11648
    #endif /* SP_INT_DIGITS >= 96 */
11649
11650
    #if SP_INT_DIGITS >= 128
11651
/* Multiply a by b and store in r: r = a * b
11652
 *
11653
 * Karatsuba implementation.
11654
 *
11655
 * @param  [in]   a  SP integer to multiply.
11656
 * @param  [in]   b  SP integer to multiply.
11657
 * @param  [out]  r  SP integer result.
11658
 *
11659
 * @return  MP_OKAY on success.
11660
 * @return  MP_MEM when dynamic memory allocation fails.
11661
 */
11662
static int _sp_mul_64(const sp_int* a, const sp_int* b, sp_int* r)
11663
{
11664
    int err = MP_OKAY;
11665
    unsigned int i;
11666
    sp_int_digit l;
11667
    sp_int_digit h;
11668
    sp_int* a1;
11669
    sp_int* b1;
11670
    sp_int* z0;
11671
    sp_int* z1;
11672
    sp_int* z2;
11673
    sp_int_digit ca;
11674
    sp_int_digit cb;
11675
    DECL_SP_INT_ARRAY(t, 32, 2);
11676
    DECL_SP_INT_ARRAY(z, 65, 2);
11677
11678
    ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
11679
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
11680
    if (err == MP_OKAY) {
11681
        a1 = t[0];
11682
        b1 = t[1];
11683
        z1 = z[0];
11684
        z2 = z[1];
11685
        z0 = r;
11686
11687
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
11688
        a1->used = 32;
11689
        XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
11690
        b1->used = 32;
11691
11692
        /* z2 = a1 * b1 */
11693
        err = _sp_mul_32(a1, b1, z2);
11694
    }
11695
    if (err == MP_OKAY) {
11696
        l = a1->dp[0];
11697
        h = 0;
11698
        SP_ASM_ADDC(l, h, a->dp[0]);
11699
        a1->dp[0] = l;
11700
        l = h;
11701
        h = 0;
11702
        for (i = 1; i < 32; i++) {
11703
            SP_ASM_ADDC(l, h, a1->dp[i]);
11704
            SP_ASM_ADDC(l, h, a->dp[i]);
11705
            a1->dp[i] = l;
11706
            l = h;
11707
            h = 0;
11708
        }
11709
        ca = l;
11710
        /* b01 = b0 + b1 */
11711
        l = b1->dp[0];
11712
        h = 0;
11713
        SP_ASM_ADDC(l, h, b->dp[0]);
11714
        b1->dp[0] = l;
11715
        l = h;
11716
        h = 0;
11717
        for (i = 1; i < 32; i++) {
11718
            SP_ASM_ADDC(l, h, b1->dp[i]);
11719
            SP_ASM_ADDC(l, h, b->dp[i]);
11720
            b1->dp[i] = l;
11721
            l = h;
11722
            h = 0;
11723
        }
11724
        cb = l;
11725
11726
        /* z0 = a0 * b0 */
11727
        err = _sp_mul_32(a, b, z0);
11728
    }
11729
    if (err == MP_OKAY) {
11730
        /* z1 = (a0 + a1) * (b0 + b1) */
11731
        err = _sp_mul_32(a1, b1, z1);
11732
    }
11733
    if (err == MP_OKAY) {
11734
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
11735
        /* r = z0 */
11736
        /* r += (z1 - z0 - z2) << 32 */
11737
        z1->dp[64] = ca & cb;
11738
        l = 0;
11739
        if (ca) {
11740
            h = 0;
11741
            for (i = 0; i < 32; i++) {
11742
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11743
                SP_ASM_ADDC(l, h, b1->dp[i]);
11744
                z1->dp[i + 32] = l;
11745
                l = h;
11746
                h = 0;
11747
            }
11748
        }
11749
        z1->dp[64] += l;
11750
        l = 0;
11751
        if (cb) {
11752
            h = 0;
11753
            for (i = 0; i < 32; i++) {
11754
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11755
                SP_ASM_ADDC(l, h, a1->dp[i]);
11756
                z1->dp[i + 32] = l;
11757
                l = h;
11758
                h = 0;
11759
            }
11760
        }
11761
        z1->dp[64] += l;
11762
        /* z1 = z1 - z0 - z1 */
11763
        l = 0;
11764
        h = 0;
11765
        for (i = 0; i < 64; i++) {
11766
            l += z1->dp[i];
11767
            SP_ASM_SUBB(l, h, z0->dp[i]);
11768
            SP_ASM_SUBB(l, h, z2->dp[i]);
11769
            z1->dp[i] = l;
11770
            l = h;
11771
            h = 0;
11772
        }
11773
        z1->dp[i] += l;
11774
        /* r += z1 << 16 */
11775
        l = 0;
11776
        h = 0;
11777
        for (i = 0; i < 32; i++) {
11778
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
11779
            SP_ASM_ADDC(l, h, z1->dp[i]);
11780
            r->dp[i + 32] = l;
11781
            l = h;
11782
            h = 0;
11783
        }
11784
        for (; i < 65; i++) {
11785
            SP_ASM_ADDC(l, h, z1->dp[i]);
11786
            r->dp[i + 32] = l;
11787
            l = h;
11788
            h = 0;
11789
        }
11790
        /* r += z2 << 64  */
11791
        l = 0;
11792
        h = 0;
11793
        for (i = 0; i < 33; i++) {
11794
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
11795
            SP_ASM_ADDC(l, h, z2->dp[i]);
11796
            r->dp[i + 64] = l;
11797
            l = h;
11798
            h = 0;
11799
        }
11800
        for (; i < 64; i++) {
11801
            SP_ASM_ADDC(l, h, z2->dp[i]);
11802
            r->dp[i + 64] = l;
11803
            l = h;
11804
            h = 0;
11805
        }
11806
        r->used = 128;
11807
        sp_clamp(r);
11808
    }
11809
11810
    FREE_SP_INT_ARRAY(z, NULL);
11811
    FREE_SP_INT_ARRAY(t, NULL);
11812
    return err;
11813
}
11814
    #endif /* SP_INT_DIGITS >= 128 */
11815
11816
    #if SP_INT_DIGITS >= 192
11817
/* Multiply a by b and store in r: r = a * b
11818
 *
11819
 * Karatsuba implementation.
11820
 *
11821
 * @param  [in]   a  SP integer to multiply.
11822
 * @param  [in]   b  SP integer to multiply.
11823
 * @param  [out]  r  SP integer result.
11824
 *
11825
 * @return  MP_OKAY on success.
11826
 * @return  MP_MEM when dynamic memory allocation fails.
11827
 */
11828
static int _sp_mul_96(const sp_int* a, const sp_int* b, sp_int* r)
11829
{
11830
    int err = MP_OKAY;
11831
    unsigned int i;
11832
    sp_int_digit l;
11833
    sp_int_digit h;
11834
    sp_int* a1;
11835
    sp_int* b1;
11836
    sp_int* z0;
11837
    sp_int* z1;
11838
    sp_int* z2;
11839
    sp_int_digit ca;
11840
    sp_int_digit cb;
11841
    DECL_SP_INT_ARRAY(t, 48, 2);
11842
    DECL_SP_INT_ARRAY(z, 97, 2);
11843
11844
    ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
11845
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
11846
    if (err == MP_OKAY) {
11847
        a1 = t[0];
11848
        b1 = t[1];
11849
        z1 = z[0];
11850
        z2 = z[1];
11851
        z0 = r;
11852
11853
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
11854
        a1->used = 48;
11855
        XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
11856
        b1->used = 48;
11857
11858
        /* z2 = a1 * b1 */
11859
        err = _sp_mul_48(a1, b1, z2);
11860
    }
11861
    if (err == MP_OKAY) {
11862
        l = a1->dp[0];
11863
        h = 0;
11864
        SP_ASM_ADDC(l, h, a->dp[0]);
11865
        a1->dp[0] = l;
11866
        l = h;
11867
        h = 0;
11868
        for (i = 1; i < 48; i++) {
11869
            SP_ASM_ADDC(l, h, a1->dp[i]);
11870
            SP_ASM_ADDC(l, h, a->dp[i]);
11871
            a1->dp[i] = l;
11872
            l = h;
11873
            h = 0;
11874
        }
11875
        ca = l;
11876
        /* b01 = b0 + b1 */
11877
        l = b1->dp[0];
11878
        h = 0;
11879
        SP_ASM_ADDC(l, h, b->dp[0]);
11880
        b1->dp[0] = l;
11881
        l = h;
11882
        h = 0;
11883
        for (i = 1; i < 48; i++) {
11884
            SP_ASM_ADDC(l, h, b1->dp[i]);
11885
            SP_ASM_ADDC(l, h, b->dp[i]);
11886
            b1->dp[i] = l;
11887
            l = h;
11888
            h = 0;
11889
        }
11890
        cb = l;
11891
11892
        /* z0 = a0 * b0 */
11893
        err = _sp_mul_48(a, b, z0);
11894
    }
11895
    if (err == MP_OKAY) {
11896
        /* z1 = (a0 + a1) * (b0 + b1) */
11897
        err = _sp_mul_48(a1, b1, z1);
11898
    }
11899
    if (err == MP_OKAY) {
11900
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
11901
        /* r = z0 */
11902
        /* r += (z1 - z0 - z2) << 48 */
11903
        z1->dp[96] = ca & cb;
11904
        l = 0;
11905
        if (ca) {
11906
            h = 0;
11907
            for (i = 0; i < 48; i++) {
11908
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
11909
                SP_ASM_ADDC(l, h, b1->dp[i]);
11910
                z1->dp[i + 48] = l;
11911
                l = h;
11912
                h = 0;
11913
            }
11914
        }
11915
        z1->dp[96] += l;
11916
        l = 0;
11917
        if (cb) {
11918
            h = 0;
11919
            for (i = 0; i < 48; i++) {
11920
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
11921
                SP_ASM_ADDC(l, h, a1->dp[i]);
11922
                z1->dp[i + 48] = l;
11923
                l = h;
11924
                h = 0;
11925
            }
11926
        }
11927
        z1->dp[96] += l;
11928
        /* z1 = z1 - z0 - z1 */
11929
        l = 0;
11930
        h = 0;
11931
        for (i = 0; i < 96; i++) {
11932
            l += z1->dp[i];
11933
            SP_ASM_SUBB(l, h, z0->dp[i]);
11934
            SP_ASM_SUBB(l, h, z2->dp[i]);
11935
            z1->dp[i] = l;
11936
            l = h;
11937
            h = 0;
11938
        }
11939
        z1->dp[i] += l;
11940
        /* r += z1 << 16 */
11941
        l = 0;
11942
        h = 0;
11943
        for (i = 0; i < 48; i++) {
11944
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
11945
            SP_ASM_ADDC(l, h, z1->dp[i]);
11946
            r->dp[i + 48] = l;
11947
            l = h;
11948
            h = 0;
11949
        }
11950
        for (; i < 97; i++) {
11951
            SP_ASM_ADDC(l, h, z1->dp[i]);
11952
            r->dp[i + 48] = l;
11953
            l = h;
11954
            h = 0;
11955
        }
11956
        /* r += z2 << 96  */
11957
        l = 0;
11958
        h = 0;
11959
        for (i = 0; i < 49; i++) {
11960
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
11961
            SP_ASM_ADDC(l, h, z2->dp[i]);
11962
            r->dp[i + 96] = l;
11963
            l = h;
11964
            h = 0;
11965
        }
11966
        for (; i < 96; i++) {
11967
            SP_ASM_ADDC(l, h, z2->dp[i]);
11968
            r->dp[i + 96] = l;
11969
            l = h;
11970
            h = 0;
11971
        }
11972
        r->used = 192;
11973
        sp_clamp(r);
11974
    }
11975
11976
    FREE_SP_INT_ARRAY(z, NULL);
11977
    FREE_SP_INT_ARRAY(t, NULL);
11978
    return err;
11979
}
11980
    #endif /* SP_INT_DIGITS >= 192 */
11981
11982
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
11983
#endif /* !WOLFSSL_SP_SMALL */
11984
11985
/* Multiply a by b and store in r: r = a * b
11986
 *
11987
 * @param  [in]   a  SP integer to multiply.
11988
 * @param  [in]   b  SP integer to multiply.
11989
 * @param  [out]  r  SP integer result.
11990
 *
11991
 * @return  MP_OKAY on success.
11992
 * @return  MP_VAL when a, b or is NULL; or the result will be too big for fixed
11993
 *          data length.
11994
 * @return  MP_MEM when dynamic memory allocation fails.
11995
 */
11996
int sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
11997
12.1M
{
11998
12.1M
    int err = MP_OKAY;
11999
12.1M
#ifdef WOLFSSL_SP_INT_NEGATIVE
12000
12.1M
    sp_uint8 sign = MP_ZPOS;
12001
12.1M
#endif
12002
12003
12.1M
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
12004
0
        err = MP_VAL;
12005
0
    }
12006
12007
    /* Need extra digit during calculation. */
12008
    /* NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) */
12009
    /* clang-tidy falsely believes that r->size was corrupted by the _sp_copy()
12010
     * to "Copy base into working variable" in _sp_exptmod_ex().
12011
     */
12012
12.1M
    if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
12013
14
        err = MP_VAL;
12014
14
    }
12015
    /* NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) */
12016
12017
#if 0
12018
    if (err == MP_OKAY) {
12019
        sp_print(a, "a");
12020
        sp_print(b, "b");
12021
    }
12022
#endif
12023
12024
12.1M
    if (err == MP_OKAY) {
12025
12.1M
    #ifdef WOLFSSL_SP_INT_NEGATIVE
12026
12.1M
        sign = a->sign ^ b->sign;
12027
12.1M
    #endif
12028
12029
12.1M
        if ((a->used == 0) || (b->used == 0)) {
12030
67.0k
            _sp_zero(r);
12031
67.0k
        }
12032
12.0M
        else
12033
12.0M
#ifndef WOLFSSL_SP_SMALL
12034
12.0M
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
12035
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
12036
        if ((a->used == 4) && (b->used == 4)) {
12037
            err = _sp_mul_4(a, b, r);
12038
        }
12039
        else
12040
#endif /* SP_WORD_SIZE == 64 */
12041
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
12042
#ifdef SQR_MUL_ASM
12043
        if ((a->used == 6) && (b->used == 6)) {
12044
            err = _sp_mul_6(a, b, r);
12045
        }
12046
        else
12047
#endif /* SQR_MUL_ASM */
12048
#endif /* SP_WORD_SIZE == 64 */
12049
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
12050
#ifdef SQR_MUL_ASM
12051
        if ((a->used == 8) && (b->used == 8)) {
12052
            err = _sp_mul_8(a, b, r);
12053
        }
12054
        else
12055
#endif /* SQR_MUL_ASM */
12056
#endif /* SP_WORD_SIZE == 32 */
12057
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
12058
#ifdef SQR_MUL_ASM
12059
        if ((a->used == 12) && (b->used == 12)) {
12060
            err = _sp_mul_12(a, b, r);
12061
        }
12062
        else
12063
#endif /* SQR_MUL_ASM */
12064
#endif /* SP_WORD_SIZE == 32 */
12065
12.0M
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
12066
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
12067
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
12068
    (SP_WORD_SIZE == 64)))
12069
    #if SP_INT_DIGITS >= 32
12070
        if ((a->used == 16) && (b->used == 16)) {
12071
            err = _sp_mul_16(a, b, r);
12072
        }
12073
        else
12074
    #endif /* SP_INT_DIGITS >= 32 */
12075
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
12076
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
12077
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
12078
    #if SP_INT_DIGITS >= 48
12079
        if ((a->used == 24) && (b->used == 24)) {
12080
            err = _sp_mul_24(a, b, r);
12081
        }
12082
        else
12083
    #endif /* SP_INT_DIGITS >= 48 */
12084
    #if SP_INT_DIGITS >= 64
12085
        if ((a->used == 32) && (b->used == 32)) {
12086
            err = _sp_mul_32(a, b, r);
12087
        }
12088
        else
12089
    #endif /* SP_INT_DIGITS >= 64 */
12090
    #if SP_INT_DIGITS >= 96
12091
        if ((a->used == 48) && (b->used == 48)) {
12092
            err = _sp_mul_48(a, b, r);
12093
        }
12094
        else
12095
    #endif /* SP_INT_DIGITS >= 96 */
12096
    #if SP_INT_DIGITS >= 128
12097
        if ((a->used == 64) && (b->used == 64)) {
12098
            err = _sp_mul_64(a, b, r);
12099
        }
12100
        else
12101
    #endif /* SP_INT_DIGITS >= 128 */
12102
    #if SP_INT_DIGITS >= 192
12103
        if ((a->used == 96) && (b->used == 96)) {
12104
            err = _sp_mul_96(a, b, r);
12105
        }
12106
        else
12107
    #endif /* SP_INT_DIGITS >= 192 */
12108
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
12109
12.0M
#endif /* !WOLFSSL_SP_SMALL */
12110
12111
#ifdef SQR_MUL_ASM
12112
        if (a->used == b->used) {
12113
            err = _sp_mul_nxn(a, b, r);
12114
        }
12115
        else
12116
#endif
12117
12.0M
        {
12118
12.0M
            err = _sp_mul(a, b, r);
12119
12.0M
        }
12120
12.1M
    }
12121
12122
12.1M
#ifdef WOLFSSL_SP_INT_NEGATIVE
12123
12.1M
    if (err == MP_OKAY) {
12124
12.1M
        r->sign = (r->used == 0) ? MP_ZPOS : sign;
12125
12.1M
    }
12126
12.1M
#endif
12127
12128
#if 0
12129
    if (err == MP_OKAY) {
12130
        sp_print(r, "rmul");
12131
    }
12132
#endif
12133
12134
12.1M
    return err;
12135
12.1M
}
12136
/* END SP_MUL implementations. */
12137
12138
#endif
12139
12140
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
12141
    defined(WOLFCRYPT_HAVE_ECCSI) || \
12142
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || defined(OPENSSL_ALL)
12143
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12144
 *
12145
 * @param  [in]   a  SP integer to multiply.
12146
 * @param  [in]   b  SP integer to multiply.
12147
 * @param  [in]   m  SP integer that is the modulus.
12148
 * @param  [out]  r  SP integer result.
12149
 *
12150
 * @return  MP_OKAY on success.
12151
 * @return  MP_MEM when dynamic memory allocation fails.
12152
 */
12153
static int _sp_mulmod_tmp(const sp_int* a, const sp_int* b, const sp_int* m,
12154
    sp_int* r)
12155
81
{
12156
81
    int err = MP_OKAY;
12157
    /* Create temporary for multiplication result. */
12158
81
    DECL_SP_INT(t, a->used + b->used);
12159
12160
81
    ALLOC_SP_INT(t, a->used + b->used, err, NULL);
12161
81
    if (err == MP_OKAY) {
12162
49
        err = sp_init_size(t, (sp_size_t)(a->used + b->used));
12163
49
    }
12164
12165
    /* Multiply and reduce. */
12166
81
    if (err == MP_OKAY) {
12167
42
        err = sp_mul(a, b, t);
12168
42
    }
12169
81
    if (err == MP_OKAY) {
12170
37
        err = sp_mod(t, m, r);
12171
37
    }
12172
12173
    /* Dispose of an allocated SP int. */
12174
81
    FREE_SP_INT(t, NULL);
12175
12176
81
    return err;
12177
81
}
12178
12179
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12180
 *
12181
 * @param  [in]   a  SP integer to multiply.
12182
 * @param  [in]   b  SP integer to multiply.
12183
 * @param  [in]   m  SP integer that is the modulus.
12184
 * @param  [out]  r  SP integer result.
12185
 *
12186
 * @return  MP_OKAY on success.
12187
 * @return  MP_MEM when dynamic memory allocation fails.
12188
 */
12189
static int _sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m,
12190
    sp_int* r)
12191
7.09M
{
12192
7.09M
    int err = MP_OKAY;
12193
12194
    /* Use r as intermediate result if not same as pointer m which is needed
12195
     * after first intermediate result.
12196
     */
12197
7.09M
    if (r != m) {
12198
        /* Multiply and reduce. */
12199
7.09M
        err = sp_mul(a, b, r);
12200
7.09M
        if (err == MP_OKAY) {
12201
7.09M
            err = sp_mod(r, m, r);
12202
7.09M
        }
12203
7.09M
    }
12204
81
    else {
12205
        /* Do operation using temporary. */
12206
81
        err = _sp_mulmod_tmp(a, b, m, r);
12207
81
    }
12208
12209
7.09M
    return err;
12210
7.09M
}
12211
12212
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12213
 *
12214
 * @param  [in]   a  SP integer to multiply.
12215
 * @param  [in]   b  SP integer to multiply.
12216
 * @param  [in]   m  SP integer that is the modulus.
12217
 * @param  [out]  r  SP integer result.
12218
 *
12219
 * @return  MP_OKAY on success.
12220
 * @return  MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
12221
 *          fixed data length.
12222
 * @return  MP_MEM when dynamic memory allocation fails.
12223
 */
12224
int sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
12225
1.04M
{
12226
1.04M
    int err = MP_OKAY;
12227
12228
    /* Validate parameters. */
12229
1.04M
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
12230
0
        err = MP_VAL;
12231
0
    }
12232
    /* Ensure result SP int is big enough for intermediates. */
12233
1.04M
    if ((err == MP_OKAY) && (r != m) && (a->used + b->used > r->size)) {
12234
52
        err = MP_VAL;
12235
52
    }
12236
12237
#if 0
12238
    if (err == 0) {
12239
        sp_print(a, "a");
12240
        sp_print(b, "b");
12241
        sp_print(m, "m");
12242
    }
12243
#endif
12244
12245
1.04M
    if (err == MP_OKAY) {
12246
1.04M
        err = _sp_mulmod(a, b, m, r);
12247
1.04M
    }
12248
12249
#if 0
12250
    if (err == 0) {
12251
        sp_print(r, "rmm");
12252
    }
12253
#endif
12254
12255
1.04M
    return err;
12256
1.04M
}
12257
#endif
12258
12259
#ifdef WOLFSSL_SP_INVMOD
12260
/* Calculates the multiplicative inverse in the field. r*a = x*m + 1
12261
 * Right-shift Algorithm. NOT constant time.
12262
 *
12263
 * Algorithm:
12264
 *   1. u = m, v = a, b = 0, c = 1
12265
 *   2. While v != 1 and u != 0
12266
 *     2.1. If u even
12267
 *       2.1.1. u /= 2
12268
 *       2.1.2. b = (b / 2) mod m
12269
 *     2.2. Else if v even
12270
 *       2.2.1. v /= 2
12271
 *       2.2.2. c = (c / 2) mod m
12272
 *     2.3. Else if u >= v
12273
 *       2.3.1. u -= v
12274
 *       2.3.2. b = (c - b) mod m
12275
 *     2.4. Else (v > u)
12276
 *       2.4.1. v -= u
12277
 *       2.4.2. c = (b - c) mod m
12278
 *  3. NO_INVERSE if u == 0
12279
 *
12280
 * @param  [in]   a  SP integer to find inverse of.
12281
 * @param  [in]   m  SP integer this is the modulus.
12282
 * @param  [in]   u  SP integer to use in calculation.
12283
 * @param  [in]   v  SP integer to use in calculation.
12284
 * @param  [in]   b  SP integer to use in calculation
12285
 * @param  [out]  c  SP integer that is the inverse.
12286
 *
12287
 * @return  MP_OKAY on success.
12288
 * @return  MP_VAL when no inverse.
12289
 */
12290
static int _sp_invmod_bin(const sp_int* a, const sp_int* m, sp_int* u,
12291
    sp_int* v, sp_int* b, sp_int* c)
12292
12.5k
{
12293
12.5k
    int err = MP_OKAY;
12294
12295
    /* 1. u = m, v = a, b = 0, c = 1 */
12296
12.5k
    _sp_copy(m, u);
12297
12.5k
    if (a != v) {
12298
12.1k
        _sp_copy(a, v);
12299
12.1k
    }
12300
12.5k
    _sp_zero(b);
12301
12.5k
    _sp_set(c, 1);
12302
12303
    /* 2. While v != 1 and u != 0 */
12304
6.25M
    while (!sp_isone(v) && !sp_iszero(u)) {
12305
        /* 2.1. If u even */
12306
6.24M
        if ((u->dp[0] & 1) == 0) {
12307
            /* 2.1.1. u /= 2 */
12308
2.09M
            _sp_div_2(u, u);
12309
            /* 2.1.2. b = (b / 2) mod m */
12310
2.09M
            if (sp_isodd(b)) {
12311
1.13M
                _sp_add_off(b, m, b, 0);
12312
1.13M
            }
12313
2.09M
            _sp_div_2(b, b);
12314
2.09M
        }
12315
        /* 2.2. Else if v even */
12316
4.15M
        else if ((v->dp[0] & 1) == 0) {
12317
            /* 2.2.1. v /= 2 */
12318
2.13M
            _sp_div_2(v, v);
12319
            /* 2.1.2. c = (c / 2) mod m */
12320
2.13M
            if (sp_isodd(c)) {
12321
871k
                _sp_add_off(c, m, c, 0);
12322
871k
            }
12323
2.13M
            _sp_div_2(c, c);
12324
2.13M
        }
12325
        /* 2.3. Else if u >= v */
12326
2.01M
        else if (_sp_cmp_abs(u, v) != MP_LT) {
12327
            /* 2.3.1. u -= v */
12328
1.02M
            _sp_sub_off(u, v, u, 0);
12329
            /* 2.3.2. b = (c - b) mod m */
12330
1.02M
            if (_sp_cmp_abs(b, c) == MP_LT) {
12331
430k
                _sp_add_off(b, m, b, 0);
12332
430k
            }
12333
1.02M
            _sp_sub_off(b, c, b, 0);
12334
1.02M
        }
12335
        /* 2.4. Else (v > u) */
12336
993k
        else {
12337
            /* 2.4.1. v -= u */
12338
993k
            _sp_sub_off(v, u, v, 0);
12339
            /* 2.4.2. c = (b - c) mod m */
12340
993k
            if (_sp_cmp_abs(c, b) == MP_LT) {
12341
550k
                _sp_add_off(c, m, c, 0);
12342
550k
            }
12343
993k
            _sp_sub_off(c, b, c, 0);
12344
993k
        }
12345
6.24M
    }
12346
    /* 3. NO_INVERSE if u == 0 */
12347
12.5k
    if (sp_iszero(u)) {
12348
142
        err = MP_VAL;
12349
142
    }
12350
12351
12.5k
    return err;
12352
12.5k
}
12353
12354
#if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
12355
    (!defined(NO_RSA) || !defined(NO_DH))
12356
/* Calculates the multiplicative inverse in the field. r*a = x*m + 1
12357
 * Extended Euclidean Algorithm. NOT constant time.
12358
 *
12359
 * Creates two new SP ints.
12360
 *
12361
 * Algorithm:
12362
 *  1. x = m, y = a, b = 1, c = 0
12363
 *  2. while x > 1
12364
 *   2.1. d = x / y, r = x mod y
12365
 *   2.2. c -= d * b
12366
 *   2.3. x = y, y = r
12367
 *   2.4. s = b, b = c, c = s
12368
 *  3. If y != 0 then NO_INVERSE
12369
 *  4. If c < 0 then c += m
12370
 *  5. inv = c
12371
 *
12372
 * @param  [in]   a    SP integer to find inverse of.
12373
 * @param  [in]   m    SP integer this is the modulus.
12374
 * @param  [in]   u    SP integer to use in calculation.
12375
 * @param  [in]   v    SP integer to use in calculation.
12376
 * @param  [in]   b    SP integer to use in calculation
12377
 * @param  [in]   c    SP integer to use in calculation
12378
 * @param  [out]  inv  SP integer that is the inverse.
12379
 *
12380
 * @return  MP_OKAY on success.
12381
 * @return  MP_VAL when no inverse.
12382
 * @return  MP_MEM when dynamic memory allocation fails.
12383
 */
12384
static int _sp_invmod_div(const sp_int* a, const sp_int* m, sp_int* x,
12385
    sp_int* y, sp_int* b, sp_int* c, sp_int* inv)
12386
9.22k
{
12387
9.22k
    int err = MP_OKAY;
12388
9.22k
    sp_int* s;
12389
#ifndef WOLFSSL_SP_INT_NEGATIVE
12390
    int bneg = 0;
12391
    int cneg = 0;
12392
    int neg;
12393
#endif
12394
9.22k
    DECL_SP_INT(d, m->used + 1);
12395
12396
9.22k
    ALLOC_SP_INT(d, m->used + 1, err, NULL);
12397
9.22k
    if (err == MP_OKAY) {
12398
9.21k
        err = sp_init_size(d, (sp_size_t)(m->used + 1U));
12399
9.21k
    }
12400
12401
9.22k
    if (err == MP_OKAY) {
12402
        /* 1. x = m, y = a, b = 1, c = 0 */
12403
9.21k
        if (a != y) {
12404
9.17k
            _sp_copy(a, y);
12405
9.17k
        }
12406
9.21k
        _sp_copy(m, x);
12407
9.21k
        _sp_set(b, 1);
12408
9.21k
        _sp_zero(c);
12409
9.21k
    }
12410
9.22k
#ifdef WOLFSSL_SP_INT_NEGATIVE
12411
    /* 2. while x > 1 */
12412
10.1M
    while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
12413
        /* 2.1. d = x / y, r = x mod y */
12414
10.1M
        err = sp_div(x, y, d, x);
12415
10.1M
        if (err == MP_OKAY) {
12416
            /* 2.2. c -= d * b */
12417
10.1M
            if (sp_isone(d)) {
12418
                /* c -= 1 * b */
12419
4.19M
                err = sp_sub(c, b, c);
12420
4.19M
            }
12421
5.91M
            else {
12422
                /* d *= b */
12423
5.91M
                err = sp_mul(d, b, d);
12424
                /* c -= d */
12425
5.91M
                if (err == MP_OKAY) {
12426
5.91M
                    err = sp_sub(c, d, c);
12427
5.91M
                }
12428
5.91M
            }
12429
            /* 2.3. x = y, y = r */
12430
10.1M
            s = y; y = x; x = s;
12431
            /* 2.4. s = b, b = c, c = s */
12432
10.1M
            s = b; b = c; c = s;
12433
10.1M
        }
12434
10.1M
    }
12435
    /* 3. If y != 0 then NO_INVERSE */
12436
9.22k
    if ((err == MP_OKAY) && (!sp_iszero(y))) {
12437
0
        err = MP_VAL;
12438
0
    }
12439
    /* 4. If c < 0 then c += m */
12440
9.22k
    if ((err == MP_OKAY) && sp_isneg(c)) {
12441
4.26k
        err = sp_add(c, m, c);
12442
4.26k
    }
12443
9.22k
    if (err == MP_OKAY) {
12444
        /* 5. inv = c */
12445
8.53k
        err = sp_copy(c, inv);
12446
8.53k
    }
12447
#else
12448
    /* 2. while x > 1 */
12449
    while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
12450
        /* 2.1. d = x / y, r = x mod y */
12451
        err = sp_div(x, y, d, x);
12452
        if (err == MP_OKAY) {
12453
            if (sp_isone(d)) {
12454
                /* c -= 1 * b */
12455
                if ((bneg ^ cneg) == 1) {
12456
                    /* c -= -b or -c -= b, therefore add. */
12457
                    _sp_add_off(c, b, c, 0);
12458
                }
12459
                else if (_sp_cmp_abs(c, b) == MP_LT) {
12460
                    /* |c| < |b| and same sign, reverse subtract and negate. */
12461
                    _sp_sub_off(b, c, c, 0);
12462
                    cneg = !cneg;
12463
                }
12464
                else {
12465
                    /* |c| >= |b| */
12466
                    _sp_sub_off(c, b, c, 0);
12467
                }
12468
            }
12469
            else {
12470
                /* d *= b */
12471
                err = sp_mul(d, b, d);
12472
                /* c -= d */
12473
                if (err == MP_OKAY) {
12474
                    if ((bneg ^ cneg) == 1) {
12475
                        /* c -= -d or -c -= d, therefore add. */
12476
                        _sp_add_off(c, d, c, 0);
12477
                    }
12478
                    else if (_sp_cmp_abs(c, d) == MP_LT) {
12479
                        /* |c| < |d| and same sign, reverse subtract and negate.
12480
                         */
12481
                        _sp_sub_off(d, c, c, 0);
12482
                        cneg = !cneg;
12483
                    }
12484
                    else {
12485
                        _sp_sub_off(c, d, c, 0);
12486
                    }
12487
                }
12488
            }
12489
            /* 2.3. x = y, y = r */
12490
            s = y; y = x; x = s;
12491
            /* 2.4. s = b, b = c, c = s */
12492
            s = b; b = c; c = s;
12493
            neg = bneg; bneg = cneg; cneg = neg;
12494
        }
12495
    }
12496
    /* 3. If y != 0 then NO_INVERSE */
12497
    if ((err == MP_OKAY) && (!sp_iszero(y))) {
12498
        err = MP_VAL;
12499
    }
12500
    /* 4. If c < 0 then c += m */
12501
    if ((err == MP_OKAY) && cneg) {
12502
        /* c = m - |c| */
12503
        _sp_sub_off(m, c, c, 0);
12504
    }
12505
    if (err == MP_OKAY) {
12506
        /* 5. inv = c */
12507
        err = sp_copy(c, inv);
12508
    }
12509
#endif
12510
12511
9.22k
    FREE_SP_INT(d, NULL);
12512
9.22k
    return err;
12513
9.22k
}
12514
#endif
12515
12516
/* Calculates the multiplicative inverse in the field.
12517
 * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
12518
 *
12519
 * r*a = x*m + 1
12520
 *
12521
 * @param  [in]   a  SP integer to find inverse of.
12522
 * @param  [in]   m  SP integer this is the modulus.
12523
 * @param  [out]  r  SP integer to hold result. r cannot be m.
12524
 *
12525
 * @return  MP_OKAY on success.
12526
 * @return  MP_VAL when m is even and a divides m evenly.
12527
 * @return  MP_MEM when dynamic memory allocation fails.
12528
 */
12529
static int _sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
12530
28.1k
{
12531
28.1k
    int err = MP_OKAY;
12532
28.1k
    sp_int* u = NULL;
12533
28.1k
    sp_int* v = NULL;
12534
28.1k
    sp_int* b = NULL;
12535
28.1k
    DECL_SP_INT_ARRAY(t, m->used + 1, 3);
12536
28.1k
    DECL_SP_INT(c, 2 * m->used + 1);
12537
12538
    /* Allocate SP ints:
12539
     *  - x3 one word larger than modulus
12540
     *  - x1 one word longer than twice modulus used
12541
     */
12542
28.1k
    ALLOC_SP_INT_ARRAY(t, m->used + 1U, 3, err, NULL);
12543
28.1k
    ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
12544
28.1k
    if (err == MP_OKAY) {
12545
28.0k
        u = t[0];
12546
28.0k
        v = t[1];
12547
28.0k
        b = t[2];
12548
        /* c allocated separately and larger for even mod case. */
12549
28.0k
    }
12550
12551
    /* Initialize intermediate values with minimal sizes. */
12552
28.1k
    if (err == MP_OKAY) {
12553
28.0k
        err = sp_init_size(u, (sp_size_t)(m->used + 1U));
12554
28.0k
    }
12555
28.1k
    if (err == MP_OKAY) {
12556
28.0k
        err = sp_init_size(v, (sp_size_t)(m->used + 1U));
12557
28.0k
    }
12558
28.1k
    if (err == MP_OKAY) {
12559
28.0k
        err = sp_init_size(b, (sp_size_t)(m->used + 1U));
12560
28.0k
    }
12561
28.1k
    if (err == MP_OKAY) {
12562
28.0k
        err = sp_init_size(c, (sp_size_t)(2U * m->used + 1U));
12563
28.0k
    }
12564
12565
28.1k
    if (err == MP_OKAY) {
12566
28.0k
        const sp_int* mm = m;
12567
28.0k
        const sp_int* ma = a;
12568
28.0k
        int evenMod = 0;
12569
12570
28.0k
        if (sp_iseven(m)) {
12571
            /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
12572
1.17k
            mm = a;
12573
1.17k
            ma = v;
12574
1.17k
            _sp_copy(a, u);
12575
1.17k
            err = sp_mod(m, a, v);
12576
            /* v == 0 when a divides m evenly - no inverse.  */
12577
1.17k
            if ((err == MP_OKAY) && sp_iszero(v)) {
12578
52
                err = MP_VAL;
12579
52
            }
12580
1.17k
            evenMod = 1;
12581
1.17k
        }
12582
12583
28.0k
        if (err == MP_OKAY) {
12584
            /* Calculate inverse. */
12585
28.0k
        #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
12586
28.0k
            (!defined(NO_RSA) || !defined(NO_DH))
12587
28.0k
            if (sp_count_bits(mm) >= 1024) {
12588
9.51k
                err = _sp_invmod_div(ma, mm, u, v, b, c, c);
12589
9.51k
            }
12590
18.5k
            else
12591
18.5k
        #endif
12592
18.5k
            {
12593
18.5k
                err = _sp_invmod_bin(ma, mm, u, v, b, c);
12594
18.5k
            }
12595
28.0k
        }
12596
12597
        /* Fixup for even modulus. */
12598
28.0k
        if ((err == MP_OKAY) && evenMod) {
12599
            /* Finish operation.
12600
             *    a^-1 mod m = m + ((1 - m*c) / a)
12601
             * => a^-1 mod m = m - ((m*c - 1) / a)
12602
             */
12603
943
            err = sp_mul(c, m, c);
12604
943
            if (err == MP_OKAY) {
12605
939
                _sp_sub_d(c, 1, c);
12606
939
                err = sp_div(c, a, c, NULL);
12607
939
            }
12608
943
            if (err == MP_OKAY) {
12609
937
                err = sp_sub(m, c, r);
12610
937
            }
12611
943
        }
12612
27.1k
        else if (err == MP_OKAY) {
12613
26.0k
            _sp_copy(c, r);
12614
26.0k
        }
12615
28.0k
    }
12616
12617
28.1k
    FREE_SP_INT(c, NULL);
12618
28.1k
    FREE_SP_INT_ARRAY(t, NULL);
12619
28.1k
    return err;
12620
28.1k
}
12621
12622
/* Calculates the multiplicative inverse in the field.
12623
 * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
12624
 *
12625
 * r*a = x*m + 1
12626
 *
12627
 * @param  [in]   a  SP integer to find inverse of.
12628
 * @param  [in]   m  SP integer this is the modulus.
12629
 * @param  [out]  r  SP integer to hold result. r cannot be m.
12630
 *
12631
 * @return  MP_OKAY on success.
12632
 * @return  MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
12633
 *          m is negative.
12634
 * @return  MP_MEM when dynamic memory allocation fails.
12635
 */
12636
int sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
12637
22.6k
{
12638
22.6k
    int err = MP_OKAY;
12639
12640
    /* Validate parameters. */
12641
22.6k
    if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
12642
6
        err = MP_VAL;
12643
6
    }
12644
22.6k
    if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
12645
20
        err = MP_VAL;
12646
20
    }
12647
12648
22.6k
#ifdef WOLFSSL_SP_INT_NEGATIVE
12649
    /* Don't support negative modulus. */
12650
22.6k
    if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
12651
24
        err = MP_VAL;
12652
24
    }
12653
22.6k
#endif
12654
12655
22.6k
    if (err == MP_OKAY) {
12656
        /* Ensure number is less than modulus. */
12657
22.5k
        if (_sp_cmp_abs(a, m) != MP_LT) {
12658
2.86k
            err = sp_mod(a, m, r);
12659
2.86k
            a = r;
12660
2.86k
        }
12661
22.5k
    }
12662
12663
22.6k
#ifdef WOLFSSL_SP_INT_NEGATIVE
12664
22.6k
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
12665
        /* Make 'a' positive */
12666
143
        err = sp_add(m, a, r);
12667
143
        a = r;
12668
143
    }
12669
22.6k
#endif
12670
12671
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1)  */
12672
22.6k
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
12673
36
        err = MP_VAL;
12674
36
    }
12675
    /* r*2*x != n*2*y + 1 for integer x,y */
12676
22.6k
    if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
12677
31
        err = MP_VAL;
12678
31
    }
12679
    /* 1*1 = 0*m + 1  */
12680
22.6k
    if ((err == MP_OKAY) && sp_isone(a)) {
12681
605
        _sp_set(r, 1);
12682
605
    }
12683
22.0k
    else if (err == MP_OKAY) {
12684
21.8k
        err = _sp_invmod(a, m, r);
12685
21.8k
    }
12686
12687
22.6k
    return err;
12688
22.6k
}
12689
#endif /* WOLFSSL_SP_INVMOD */
12690
12691
#ifdef WOLFSSL_SP_INVMOD_MONT_CT
12692
12693
/* Number of entries to pre-compute.
12694
 * Many pre-defined primes have multiple of 8 consecutive 1s.
12695
 * P-256 modulus - 2 => 32x1, 31x0, 1x1, 96x0, 94x1, 1x0, 1x1.
12696
 */
12697
4.88M
#define CT_INV_MOD_PRE_CNT      8
12698
12699
/* Calculates the multiplicative inverse in the field - constant time.
12700
 *
12701
 * Modulus (m) must be a prime and greater than 2.
12702
 * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
12703
 *
12704
 * Algorithm:
12705
 *  pre = pre-computed values, m = modulus, a = value to find inverse of,
12706
 *  e = exponent
12707
 *  Pre-calc:
12708
 *   1. pre[0] = 2^0 * a mod m
12709
 *   2. For i in 2..CT_INV_MOD_PRE_CNT
12710
 *    2.1. pre[i-1] = ((pre[i-2] ^ 2) * a) mod m
12711
 *  Calc inverse:
12712
 *   1. e = m - 2
12713
 *   2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
12714
 *   3. t = pre[j-1]
12715
 *   4. s = 0
12716
 *   5. j = 0
12717
 *   6. For i index of next top bit..0
12718
 *    6.1. bit = e[i]
12719
 *    6.2. j += bit
12720
 *    6.3. s += 1
12721
 *    6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
12722
 *     6.4.1. s -= 1 - bit
12723
 *     6.4.2. For s downto 1
12724
 *      6.4.2.1. t = (t ^ 2) mod m
12725
 *     6.4.3. s = 1 - bit
12726
 *     6.4.4. t = (t * pre[j-1]) mod m
12727
 *     6.4.5. j = 0
12728
 *   7. For s downto 1
12729
 *    7.1. t = (t ^ 2) mod m
12730
 *   8. If j > 0 then r = (t * pre[j-1]) mod m
12731
 *   9. Else r = t
12732
 *
12733
 * @param  [in]   a   SP integer, Montgomery form, to find inverse of.
12734
 * @param  [in]   m   SP integer this is the modulus.
12735
 * @param  [out]  r   SP integer to hold result.
12736
 * @param  [in]   mp  SP integer digit that is the bottom digit of inv(-m).
12737
 *
12738
 * @return  MP_OKAY on success.
12739
 * @return  MP_MEM when dynamic memory allocation fails.
12740
 */
12741
static int _sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
12742
    sp_int_digit mp)
12743
17.5k
{
12744
17.5k
    int err = MP_OKAY;
12745
17.5k
    int i;
12746
17.5k
    int j = 0;
12747
17.5k
    int s = 0;
12748
17.5k
    sp_int* t = NULL;
12749
17.5k
    sp_int* e = NULL;
12750
17.5k
#ifndef WOLFSSL_SP_NO_MALLOC
12751
17.5k
    DECL_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
12752
#else
12753
    DECL_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
12754
#endif
12755
12756
17.5k
#ifndef WOLFSSL_SP_NO_MALLOC
12757
17.5k
    ALLOC_DYN_SP_INT_ARRAY(pre, m->used * 2U + 1U, CT_INV_MOD_PRE_CNT + 2, err,
12758
17.5k
        NULL);
12759
#else
12760
    ALLOC_SP_INT_ARRAY(pre, m->used * 2U + 1U, CT_INV_MOD_PRE_CNT + 2, err, NULL);
12761
#endif
12762
17.5k
    if (err == MP_OKAY) {
12763
17.4k
        t = pre[CT_INV_MOD_PRE_CNT + 0];
12764
17.4k
        e = pre[CT_INV_MOD_PRE_CNT + 1];
12765
        /* Space for sqr and mul result. */
12766
17.4k
        _sp_init_size(t, (sp_size_t)(m->used * 2 + 1));
12767
        /* e = mod - 2 */
12768
17.4k
        _sp_init_size(e, (sp_size_t)(m->used + 1));
12769
12770
        /* Create pre-computation results: ((2^(1..8))-1).a. */
12771
17.4k
        _sp_init_size(pre[0], (sp_size_t)(m->used * 2 + 1));
12772
        /* 1. pre[0] = 2^0 * a mod m
12773
         *    Start with 1.a = a.
12774
         */
12775
17.4k
        _sp_copy(a, pre[0]);
12776
        /* 2. For i in 2..CT_INV_MOD_PRE_CNT
12777
         *    For rest of entries in table.
12778
         */
12779
139k
        for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
12780
            /* 2.1 pre[i-1] = ((pre[i-1] ^ 2) * a) mod m */
12781
            /* Previous value ..1 -> ..10 */
12782
122k
            _sp_init_size(pre[i], (sp_size_t)(m->used * 2 + 1));
12783
122k
            err = sp_sqr(pre[i-1], pre[i]);
12784
122k
            if (err == MP_OKAY) {
12785
122k
                err = _sp_mont_red(pre[i], m, mp, 0);
12786
122k
            }
12787
            /* ..10 -> ..11 */
12788
122k
            if (err == MP_OKAY) {
12789
122k
                err = sp_mul(pre[i], a, pre[i]);
12790
122k
            }
12791
122k
            if (err == MP_OKAY) {
12792
122k
                err = _sp_mont_red(pre[i], m, mp, 0);
12793
122k
            }
12794
122k
        }
12795
17.4k
    }
12796
12797
17.5k
    if (err == MP_OKAY) {
12798
        /* 1. e = m - 2 */
12799
17.4k
        _sp_sub_d(m, 2, e);
12800
        /* 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
12801
         *    One or more of the top bits is 1 so count.
12802
         */
12803
127k
        for (i = sp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
12804
127k
            if ((!sp_is_bit_set(e, (unsigned int)i)) ||
12805
125k
                    (j == CT_INV_MOD_PRE_CNT)) {
12806
17.3k
                break;
12807
17.3k
            }
12808
127k
        }
12809
        /* 3. Set tmp to product of leading bits. */
12810
17.4k
        _sp_copy(pre[j-1], t);
12811
12812
        /* 4. s = 0 */
12813
17.4k
        s = 0;
12814
        /* 5. j = 0 */
12815
17.4k
        j = 0;
12816
        /* 6. For i index of next top bit..0
12817
         *    Do remaining bits in exponent.
12818
         */
12819
4.59M
        for (; (err == MP_OKAY) && (i >= 0); i--) {
12820
            /* 6.1. bit = e[i] */
12821
4.58M
            int bit = sp_is_bit_set(e, (unsigned int)i);
12822
12823
            /* 6.2. j += bit
12824
             *      Update count of consecutive 1 bits.
12825
             */
12826
4.58M
            j += bit;
12827
            /* 6.3. s += 1
12828
             *      Update count of squares required.
12829
             */
12830
4.58M
            s++;
12831
12832
            /* 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
12833
             *      Check if max 1 bits or 0 and have seen at least one 1 bit.
12834
             */
12835
4.58M
            if ((j == CT_INV_MOD_PRE_CNT) || ((!bit) && (j > 0))) {
12836
                /* 6.4.1. s -= 1 - bit */
12837
476k
                bit = 1 - bit;
12838
476k
                s -= bit;
12839
                /* 6.4.2. For s downto 1
12840
                 *        Do s squares.
12841
                 */
12842
5.02M
                for (; (err == MP_OKAY) && (s > 0); s--) {
12843
                    /* 6.4.2.1. t = (t ^ 2) mod m */
12844
4.54M
                    err = sp_sqr(t, t);
12845
4.54M
                    if (err == MP_OKAY) {
12846
4.54M
                        err = _sp_mont_red(t, m, mp, 0);
12847
4.54M
                    }
12848
4.54M
                }
12849
                /* 6.4.3. s = 1 - bit */
12850
476k
                s = bit;
12851
12852
                /* 6.4.4. t = (t * pre[j-1]) mod m */
12853
476k
                if (err == MP_OKAY) {
12854
475k
                    err = sp_mul(t, pre[j-1], t);
12855
475k
                }
12856
476k
                if (err == MP_OKAY) {
12857
475k
                    err = _sp_mont_red(t, m, mp, 0);
12858
475k
                }
12859
                /* 6.4.5. j = 0
12860
                 *        Reset number of 1 bits seen.
12861
                 */
12862
476k
                j = 0;
12863
476k
            }
12864
4.58M
        }
12865
17.4k
    }
12866
17.5k
    if (err == MP_OKAY) {
12867
        /* 7. For s downto 1
12868
         *    Do s squares - total remaining. */
12869
53.6k
        for (; (err == MP_OKAY) && (s > 0); s--) {
12870
            /* 7.1. t = (t ^ 2) mod m */
12871
36.3k
            err = sp_sqr(t, t);
12872
36.3k
            if (err == MP_OKAY) {
12873
36.2k
                err = _sp_mont_red(t, m, mp, 0);
12874
36.2k
            }
12875
36.3k
        }
12876
17.2k
    }
12877
17.5k
    if (err == MP_OKAY) {
12878
        /* 8. If j > 0 then r = (t * pre[j-1]) mod m */
12879
17.2k
        if (j > 0) {
12880
16.6k
            err = sp_mul(t, pre[j-1], r);
12881
16.6k
            if (err == MP_OKAY) {
12882
16.6k
                err = _sp_mont_red(r, m, mp, 0);
12883
16.6k
            }
12884
16.6k
        }
12885
        /* 9. Else r = t */
12886
634
        else {
12887
634
            _sp_copy(t, r);
12888
634
        }
12889
17.2k
    }
12890
12891
17.5k
#ifndef WOLFSSL_SP_NO_MALLOC
12892
17.5k
    FREE_DYN_SP_INT_ARRAY(pre, NULL);
12893
#else
12894
    FREE_SP_INT_ARRAY(pre, NULL);
12895
#endif
12896
17.5k
    return err;
12897
17.5k
}
12898
12899
/* Calculates the multiplicative inverse in the field - constant time.
12900
 *
12901
 * Modulus (m) must be a prime and greater than 2.
12902
 * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
12903
 *
12904
 * @param  [in]   a   SP integer, Montgomery form, to find inverse of.
12905
 * @param  [in]   m   SP integer this is the modulus.
12906
 * @param  [out]  r   SP integer to hold result.
12907
 * @param  [in]   mp  SP integer digit that is the bottom digit of inv(-m).
12908
 *
12909
 * @return  MP_OKAY on success.
12910
 * @return  MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
12911
 * @return  MP_MEM when dynamic memory allocation fails.
12912
 */
12913
int sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
12914
    sp_int_digit mp)
12915
17.5k
{
12916
17.5k
    int err = MP_OKAY;
12917
12918
    /* Validate parameters. */
12919
17.5k
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
12920
0
        err = MP_VAL;
12921
0
    }
12922
    /* Ensure m is not too big. */
12923
17.5k
    else if (m->used * 2 >= SP_INT_DIGITS) {
12924
0
        err = MP_VAL;
12925
0
    }
12926
    /* check that r can hold the range of the modulus result */
12927
17.5k
    else if (m->used > r->size) {
12928
15
        err = MP_VAL;
12929
15
    }
12930
12931
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
12932
17.5k
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
12933
17.5k
            ((m->used == 1) && (m->dp[0] < 3)))) {
12934
36
        err = MP_VAL;
12935
36
    }
12936
12937
17.5k
    if (err == MP_OKAY) {
12938
        /* Do operation. */
12939
17.5k
        err = _sp_invmod_mont_ct(a, m, r, mp);
12940
17.5k
    }
12941
12942
17.5k
    return err;
12943
17.5k
}
12944
12945
#endif /* WOLFSSL_SP_INVMOD_MONT_CT */
12946
12947
12948
/**************************
12949
 * Exponentiation functions
12950
 **************************/
12951
12952
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
12953
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
12954
    defined(OPENSSL_ALL)
12955
12956
#ifndef WC_PROTECT_ENCRYPTED_MEM
12957
12958
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
12959
 * Process the exponent one bit at a time.
12960
 * Is constant time and can be cache attack resistant.
12961
 *
12962
 * Algorithm:
12963
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
12964
 *  1. s = 0
12965
 *  2. t[0] = b mod m.
12966
 *  3. t[1] = t[0]
12967
 *  4. For i in (bits-1)...0
12968
 *   4.1. t[s] = t[s] ^ 2
12969
 *   4.2. y = e[i]
12970
 *   4.3  j = y & s
12971
 *   4.4  s = s | y
12972
 *   4.5. t[j] = t[j] * b
12973
 *  5. r = t[1]
12974
 *
12975
 * @param  [in]   b     SP integer that is the base.
12976
 * @param  [in]   e     SP integer that is the exponent.
12977
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
12978
 *                      count of bits in e.
12979
 * @param  [in]   m     SP integer that is the modulus.
12980
 * @param  [out]  r     SP integer to hold result.
12981
 *
12982
 * @return  MP_OKAY on success.
12983
 * @return  MP_MEM when dynamic memory allocation fails.
12984
 */
12985
static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
12986
    const sp_int* m, sp_int* r)
12987
75.5k
{
12988
75.5k
    int i;
12989
75.5k
    int err = MP_OKAY;
12990
75.5k
    int done = 0;
12991
    /* 1. s = 0 */
12992
75.5k
    int s = 0;
12993
#ifdef WC_NO_CACHE_RESISTANT
12994
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
12995
#else
12996
75.5k
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
12997
75.5k
#endif
12998
12999
    /* Allocate temporaries. */
13000
#ifdef WC_NO_CACHE_RESISTANT
13001
    ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
13002
#else
13003
    /* Working SP int needed when cache resistant. */
13004
75.5k
    ALLOC_SP_INT_ARRAY(t, 2U * m->used + 1U, 3, err, NULL);
13005
75.5k
#endif
13006
75.5k
    if (err == MP_OKAY) {
13007
        /* Initialize temporaries. */
13008
75.4k
        _sp_init_size(t[0], (sp_size_t)(m->used * 2 + 1));
13009
75.4k
        _sp_init_size(t[1], (sp_size_t)(m->used * 2 + 1));
13010
75.4k
    #ifndef WC_NO_CACHE_RESISTANT
13011
75.4k
        _sp_init_size(t[2], (sp_size_t)(m->used * 2 + 1));
13012
75.4k
    #endif
13013
13014
        /* 2. t[0] = b mod m
13015
         * Ensure base is less than modulus - set fake working value to base.
13016
         */
13017
75.4k
        if (_sp_cmp_abs(b, m) != MP_LT) {
13018
69
            err = sp_mod(b, m, t[0]);
13019
            /* Handle base == modulus. */
13020
69
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
13021
9
                _sp_set(r, 0);
13022
9
                done = 1;
13023
9
            }
13024
69
        }
13025
75.3k
        else {
13026
            /* Copy base into working variable. */
13027
75.3k
            _sp_copy(b, t[0]);
13028
75.3k
        }
13029
75.4k
    }
13030
13031
75.5k
    if ((!done) && (err == MP_OKAY)) {
13032
        /* 3. t[1] = t[0]
13033
         *    Set real working value to base.
13034
         */
13035
75.4k
        _sp_copy(t[0], t[1]);
13036
13037
        /* 4. For i in (bits-1)...0 */
13038
6.11M
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13039
#ifdef WC_NO_CACHE_RESISTANT
13040
            /* 4.1. t[s] = t[s] ^ 2 */
13041
            err = sp_sqrmod(t[s], m, t[s]);
13042
            if (err == MP_OKAY) {
13043
                /* 4.2. y = e[i] */
13044
                int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13045
                /* 4.3. j = y & s */
13046
                int j = y & s;
13047
                /* 4.4  s = s | y */
13048
                s |= y;
13049
                /* 4.5. t[j] = t[j] * b */
13050
                err = _sp_mulmod(t[j], b, m, t[j]);
13051
            }
13052
#else
13053
            /* 4.1. t[s] = t[s] ^ 2 */
13054
6.03M
            _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13055
6.03M
                               ((size_t)t[1] & sp_off_on_addr[s  ])),
13056
6.03M
                     t[2]);
13057
6.03M
            err = sp_sqrmod(t[2], m, t[2]);
13058
6.03M
            _sp_copy(t[2],
13059
6.03M
                     (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13060
6.03M
                               ((size_t)t[1] & sp_off_on_addr[s  ])));
13061
13062
6.03M
            if (err == MP_OKAY) {
13063
                /* 4.2. y = e[i] */
13064
6.03M
                int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & (int)SP_WORD_MASK)) & 1);
13065
                /* 4.3. j = y & s */
13066
6.03M
                int j = y & s;
13067
                /* 4.4  s = s | y */
13068
6.03M
                s |= y;
13069
                /* 4.5. t[j] = t[j] * b */
13070
6.03M
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13071
6.03M
                                   ((size_t)t[1] & sp_off_on_addr[j  ])),
13072
6.03M
                         t[2]);
13073
6.03M
                err = _sp_mulmod(t[2], b, m, t[2]);
13074
6.03M
                _sp_copy(t[2],
13075
6.03M
                         (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13076
6.03M
                                   ((size_t)t[1] & sp_off_on_addr[j  ])));
13077
6.03M
            }
13078
6.03M
#endif
13079
6.03M
        }
13080
75.4k
    }
13081
75.5k
    if ((!done) && (err == MP_OKAY)) {
13082
        /* 5. r = t[1] */
13083
75.2k
        _sp_copy(t[1], r);
13084
75.2k
    }
13085
13086
75.5k
    FREE_SP_INT_ARRAY(t, NULL);
13087
75.5k
    return err;
13088
75.5k
}
13089
13090
#else
13091
13092
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13093
 * Process the exponent one bit at a time with base in Montgomery form.
13094
 * Is constant time and cache attack resistant.
13095
 *
13096
 * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
13097
 * Cryptographic Hardware and Embedded Systems, CHES 2002
13098
 *
13099
 * Algorithm:
13100
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13101
 *  1. t[1] = b mod m.
13102
 *  2. t[0] = 1
13103
 *  3. For i in (bits-1)...0
13104
 *   3.1. y = e[i]
13105
 *   3.2. t[2] = t[0] * t[1]
13106
 *   3.3. t[3] = t[y] ^ 2
13107
 *   3.4. t[y] = t[3], t[y^1] = t[2]
13108
 *  4. r = t[0]
13109
 *
13110
 * @param  [in]   b     SP integer that is the base.
13111
 * @param  [in]   e     SP integer that is the exponent.
13112
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
13113
 *                      count of bits in e.
13114
 * @param  [in]   m     SP integer that is the modulus.
13115
 * @param  [out]  r     SP integer to hold result.
13116
 *
13117
 * @return  MP_OKAY on success.
13118
 * @return  MP_MEM when dynamic memory allocation fails.
13119
 */
13120
static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
13121
    const sp_int* m, sp_int* r)
13122
{
13123
    int err = MP_OKAY;
13124
    int done = 0;
13125
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13126
13127
    /* Allocate temporaries. */
13128
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
13129
    if (err == MP_OKAY) {
13130
        /* Initialize temporaries. */
13131
        _sp_init_size(t[0], m->used * 2 + 1);
13132
        _sp_init_size(t[1], m->used * 2 + 1);
13133
        _sp_init_size(t[2], m->used * 2 + 1);
13134
        _sp_init_size(t[3], m->used * 2 + 1);
13135
13136
        /* 1. Ensure base is less than modulus. */
13137
        if (_sp_cmp_abs(b, m) != MP_LT) {
13138
            err = sp_mod(b, m, t[1]);
13139
            /* Handle base == modulus. */
13140
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13141
                _sp_set(r, 0);
13142
                done = 1;
13143
            }
13144
        }
13145
        else {
13146
            /* Copy base into working variable. */
13147
            err = sp_copy(b, t[1]);
13148
        }
13149
    }
13150
13151
    if ((!done) && (err == MP_OKAY)) {
13152
        int i;
13153
13154
        /* 2. t[0] = 1 */
13155
        _sp_set(t[0], 1);
13156
13157
        /* 3. For i in (bits-1)...0 */
13158
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13159
            /* 3.1. y = e[i] */
13160
            int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13161
13162
            /* 3.2. t[2] = t[0] * t[1] */
13163
            err = sp_mulmod(t[0], t[1], m, t[2]);
13164
            /* 3.3. t[3] = t[y] ^ 2 */
13165
            if (err == MP_OKAY) {
13166
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
13167
                                   ((size_t)t[1] & sp_off_on_addr[y  ])),
13168
                         t[3]);
13169
                err = sp_sqrmod(t[3], m, t[3]);
13170
            }
13171
            /* 3.4. t[y] = t[3], t[y^1] = t[2] */
13172
            if (err == MP_OKAY) {
13173
                _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
13174
            }
13175
        }
13176
    }
13177
    if ((!done) && (err == MP_OKAY)) {
13178
        /* 4. r = t[0] */
13179
        err = sp_copy(t[0], r);
13180
    }
13181
13182
    FREE_SP_INT_ARRAY(t, NULL);
13183
    return err;
13184
}
13185
13186
#endif /* WC_PROTECT_ENCRYPTED_MEM */
13187
13188
#endif
13189
13190
#if (defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
13191
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
13192
    defined(OPENSSL_ALL)
13193
#ifndef WC_NO_HARDEN
13194
#if !defined(WC_NO_CACHE_RESISTANT)
13195
13196
#ifndef WC_PROTECT_ENCRYPTED_MEM
13197
13198
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13199
 * Process the exponent one bit at a time with base in Montgomery form.
13200
 * Is constant time and cache attack resistant.
13201
 *
13202
 * Algorithm:
13203
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13204
 *  1. t[0] = b mod m.
13205
 *  2. s = 0
13206
 *  3. t[0] = ToMont(t[0])
13207
 *  4. t[1] = t[0]
13208
 *  5. bm = t[0]
13209
 *  6. For i in (bits-1)...0
13210
 *   6.1. t[s] = t[s] ^ 2
13211
 *   6.2. y = e[i]
13212
 *   6.3  j = y & s
13213
 *   6.4  s = s | y
13214
 *   6.5. t[j] = t[j] * bm
13215
 *  7. t[1] = FromMont(t[1])
13216
 *  8. r = t[1]
13217
 *
13218
 * @param  [in]   b     SP integer that is the base.
13219
 * @param  [in]   e     SP integer that is the exponent.
13220
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
13221
 *                      count of bits in e.
13222
 * @param  [in]   m     SP integer that is the modulus.
13223
 * @param  [out]  r     SP integer to hold result.
13224
 *
13225
 * @return  MP_OKAY on success.
13226
 * @return  MP_MEM when dynamic memory allocation fails.
13227
 */
13228
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13229
    const sp_int* m, sp_int* r)
13230
742k
{
13231
742k
    int err = MP_OKAY;
13232
742k
    int done = 0;
13233
742k
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13234
13235
    /* Allocate temporaries. */
13236
742k
    ALLOC_SP_INT_ARRAY(t, m->used * 2U + 1U, 4, err, NULL);
13237
742k
    if (err == MP_OKAY) {
13238
        /* Initialize temporaries. */
13239
742k
        _sp_init_size(t[0], (sp_size_t)(m->used * 2 + 1));
13240
742k
        _sp_init_size(t[1], (sp_size_t)(m->used * 2 + 1));
13241
742k
        _sp_init_size(t[2], (sp_size_t)(m->used * 2 + 1));
13242
742k
        _sp_init_size(t[3], (sp_size_t)(m->used * 2 + 1));
13243
13244
        /* 1. Ensure base is less than modulus. */
13245
742k
        if (_sp_cmp_abs(b, m) != MP_LT) {
13246
0
            err = sp_mod(b, m, t[0]);
13247
            /* Handle base == modulus. */
13248
0
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
13249
0
                _sp_set(r, 0);
13250
0
                done = 1;
13251
0
            }
13252
0
        }
13253
742k
        else {
13254
            /* Copy base into working variable. */
13255
742k
            _sp_copy(b, t[0]);
13256
742k
        }
13257
742k
    }
13258
13259
742k
    if ((!done) && (err == MP_OKAY)) {
13260
742k
        int i;
13261
        /* 2. s = 0 */
13262
742k
        int s = 0;
13263
742k
        sp_int_digit mp;
13264
13265
        /* Calculate Montgomery multiplier for reduction. */
13266
742k
        _sp_mont_setup(m, &mp);
13267
        /* 3. t[0] = ToMont(t[0])
13268
         *    Convert base to Montgomery form - as fake working value.
13269
         */
13270
742k
        err = sp_mont_norm(t[1], m);
13271
742k
        if (err == MP_OKAY) {
13272
742k
            err = sp_mul(t[0], t[1], t[0]);
13273
742k
        }
13274
742k
        if (err == MP_OKAY) {
13275
            /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
13276
742k
            err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1U);
13277
742k
        }
13278
742k
        if (err == MP_OKAY) {
13279
            /* 4. t[1] = t[0]
13280
             *    Set real working value to base.
13281
             */
13282
742k
            _sp_copy(t[0], t[1]);
13283
            /* 5. bm = t[0]. */
13284
742k
            _sp_copy(t[0], t[2]);
13285
742k
        }
13286
13287
        /* 6. For i in (bits-1)...0 */
13288
67.5M
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13289
            /* 6.1. t[s] = t[s] ^ 2 */
13290
66.7M
            _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13291
66.7M
                               ((size_t)t[1] & sp_off_on_addr[s  ])),
13292
66.7M
                     t[3]);
13293
66.7M
            err = sp_sqr(t[3], t[3]);
13294
66.7M
            if (err == MP_OKAY) {
13295
66.7M
                err = _sp_mont_red(t[3], m, mp, 0);
13296
66.7M
            }
13297
66.7M
            _sp_copy(t[3],
13298
66.7M
                     (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13299
66.7M
                               ((size_t)t[1] & sp_off_on_addr[s  ])));
13300
13301
66.7M
            if (err == MP_OKAY) {
13302
                /* 6.2. y = e[i] */
13303
66.7M
                int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & (int)SP_WORD_MASK)) & 1);
13304
                /* 6.3  j = y & s */
13305
66.7M
                int j = y & s;
13306
                /* 6.4  s = s | y */
13307
66.7M
                s |= y;
13308
13309
                /* 6.5. t[j] = t[j] * bm */
13310
66.7M
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13311
66.7M
                                   ((size_t)t[1] & sp_off_on_addr[j  ])),
13312
66.7M
                         t[3]);
13313
66.7M
                err = sp_mul(t[3], t[2], t[3]);
13314
66.7M
                if (err == MP_OKAY) {
13315
66.7M
                    err = _sp_mont_red(t[3], m, mp, 0);
13316
66.7M
                }
13317
66.7M
                _sp_copy(t[3],
13318
66.7M
                         (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13319
66.7M
                                   ((size_t)t[1] & sp_off_on_addr[j  ])));
13320
66.7M
            }
13321
66.7M
        }
13322
742k
        if (err == MP_OKAY) {
13323
            /* 7. t[1] = FromMont(t[1]) */
13324
741k
            err = _sp_mont_red(t[1], m, mp, 0);
13325
            /* Reduction implementation returns number to range: 0..m-1. */
13326
741k
        }
13327
742k
    }
13328
742k
    if ((!done) && (err == MP_OKAY)) {
13329
        /* 8. r = t[1] */
13330
741k
        _sp_copy(t[1], r);
13331
741k
    }
13332
13333
742k
    FREE_SP_INT_ARRAY(t, NULL);
13334
742k
    return err;
13335
742k
}
13336
13337
#else
13338
13339
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13340
 * Process the exponent one bit at a time with base in Montgomery form.
13341
 * Is constant time and cache attack resistant.
13342
 *
13343
 * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
13344
 * Cryptographic Hardware and Embedded Systems, CHES 2002
13345
 *
13346
 * Algorithm:
13347
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13348
 *  1. t[1] = b mod m.
13349
 *  2. t[0] = ToMont(1)
13350
 *  3. t[1] = ToMont(t[1])
13351
 *  4. For i in (bits-1)...0
13352
 *   4.1. y = e[i]
13353
 *   4.2. t[2] = t[0] * t[1]
13354
 *   4.3. t[3] = t[y] ^ 2
13355
 *   4.4. t[y] = t[3], t[y^1] = t[2]
13356
 *  5. t[0] = FromMont(t[0])
13357
 *  6. r = t[0]
13358
 *
13359
 * @param  [in]   b     SP integer that is the base.
13360
 * @param  [in]   e     SP integer that is the exponent.
13361
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
13362
 *                      count of bits in e.
13363
 * @param  [in]   m     SP integer that is the modulus.
13364
 * @param  [out]  r     SP integer to hold result.
13365
 *
13366
 * @return  MP_OKAY on success.
13367
 * @return  MP_MEM when dynamic memory allocation fails.
13368
 */
13369
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13370
    const sp_int* m, sp_int* r)
13371
{
13372
    int err = MP_OKAY;
13373
    int done = 0;
13374
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13375
13376
    /* Allocate temporaries. */
13377
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
13378
    if (err == MP_OKAY) {
13379
        /* Initialize temporaries. */
13380
        _sp_init_size(t[0], m->used * 2 + 1);
13381
        _sp_init_size(t[1], m->used * 2 + 1);
13382
        _sp_init_size(t[2], m->used * 2 + 1);
13383
        _sp_init_size(t[3], m->used * 2 + 1);
13384
13385
        /* 1. Ensure base is less than modulus. */
13386
        if (_sp_cmp_abs(b, m) != MP_LT) {
13387
            err = sp_mod(b, m, t[1]);
13388
            /* Handle base == modulus. */
13389
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13390
                _sp_set(r, 0);
13391
                done = 1;
13392
            }
13393
        }
13394
        else {
13395
            /* Copy base into working variable. */
13396
            err = sp_copy(b, t[1]);
13397
        }
13398
    }
13399
13400
    if ((!done) && (err == MP_OKAY)) {
13401
        int i;
13402
        sp_int_digit mp;
13403
13404
        /* Calculate Montgomery multiplier for reduction. */
13405
        _sp_mont_setup(m, &mp);
13406
        /* 2. t[0] = ToMont(1)
13407
          *    Calculate 1 in Montgomery form.
13408
          */
13409
        err = sp_mont_norm(t[0], m);
13410
        if (err == MP_OKAY) {
13411
            /* 3. t[1] = ToMont(t[1])
13412
             *    Convert base to Montgomery form.
13413
             */
13414
            err = sp_mulmod(t[1], t[0], m, t[1]);
13415
        }
13416
13417
        /* 4. For i in (bits-1)...0 */
13418
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13419
            /* 4.1. y = e[i] */
13420
            int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13421
13422
            /* 4.2. t[2] = t[0] * t[1] */
13423
            err = sp_mul(t[0], t[1], t[2]);
13424
            if (err == MP_OKAY) {
13425
                err = _sp_mont_red(t[2], m, mp, 0);
13426
            }
13427
            /* 4.3. t[3] = t[y] ^ 2 */
13428
            if (err == MP_OKAY) {
13429
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
13430
                                   ((size_t)t[1] & sp_off_on_addr[y  ])),
13431
                         t[3]);
13432
                err = sp_sqr(t[3], t[3]);
13433
            }
13434
            if (err == MP_OKAY) {
13435
                err = _sp_mont_red(t[3], m, mp, 0);
13436
            }
13437
            /* 4.4. t[y] = t[3], t[y^1] = t[2] */
13438
            if (err == MP_OKAY) {
13439
                _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
13440
            }
13441
        }
13442
13443
        if (err == MP_OKAY) {
13444
            /* 5. t[0] = FromMont(t[0]) */
13445
            err = _sp_mont_red(t[0], m, mp, 0);
13446
            /* Reduction implementation returns number to range: 0..m-1. */
13447
        }
13448
    }
13449
    if ((!done) && (err == MP_OKAY)) {
13450
        /* 6. r = t[0] */
13451
        err = sp_copy(t[0], r);
13452
    }
13453
13454
    FREE_SP_INT_ARRAY(t, NULL);
13455
    return err;
13456
}
13457
13458
#endif /* WC_PROTECT_ENCRYPTED_MEM */
13459
13460
#else
13461
13462
#ifdef SP_ALLOC
13463
#define SP_ALLOC_PREDEFINED
13464
#endif
13465
/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
13466
#define SP_ALLOC
13467
13468
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13469
 * Creates a window of precalculated exponents with base in Montgomery form.
13470
 * Is constant time but NOT cache attack resistant.
13471
 *
13472
 * Algorithm:
13473
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13474
 *  w: window size based on bits.
13475
 *  1. t[1] = b mod m.
13476
 *  2. t[0] = MontNorm(m) = ToMont(1)
13477
 *  3. t[1] = ToMont(t[1])
13478
 *  4. For i in 2..(2 ^ w) - 1
13479
 *   4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2
13480
 *   4.2 if i[0] == 1 then t[i] = t[i-1] * t[1]
13481
 *  5. cb = w * (bits / w)
13482
 *  5. tr = t[e / (2 ^ cb)]
13483
 *  6. For i in cb..w
13484
 *   6.1. y = e[(i-1)..(i-w)]
13485
 *   6.2. tr = tr ^ (2 * w)
13486
 *   6.3. tr = tr * t[y]
13487
 *  7. tr = FromMont(tr)
13488
 *  8. r = tr
13489
 *
13490
 * @param  [in]   b     SP integer that is the base.
13491
 * @param  [in]   e     SP integer that is the exponent.
13492
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
13493
 *                      count of bits in e.
13494
 * @param  [in]   m     SP integer that is the modulus.
13495
 * @param  [out]  r     SP integer to hold result.
13496
 *
13497
 * @return  MP_OKAY on success.
13498
 * @return  MP_MEM when dynamic memory allocation fails.
13499
 */
13500
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13501
    const sp_int* m, sp_int* r)
13502
{
13503
    int i;
13504
    int c;
13505
    int y;
13506
    int winBits;
13507
    int preCnt;
13508
    int err = MP_OKAY;
13509
    int done = 0;
13510
    sp_int_digit mask;
13511
    sp_int* tr = NULL;
13512
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
13513
13514
    /* Window bits based on number of pre-calculations versus number of loop
13515
     * calculations.
13516
     * Exponents for RSA and DH will result in 6-bit windows.
13517
     */
13518
    if (bits > 450) {
13519
        winBits = 6;
13520
    }
13521
    else if (bits <= 21) {
13522
        winBits = 1;
13523
    }
13524
    else if (bits <= 36) {
13525
        winBits = 3;
13526
    }
13527
    else if (bits <= 140) {
13528
        winBits = 4;
13529
    }
13530
    else {
13531
        winBits = 5;
13532
    }
13533
    /* An entry for each possible 0..2^winBits-1 value. */
13534
    preCnt = 1 << winBits;
13535
    /* Mask for calculating index into pre-computed table. */
13536
    mask = preCnt - 1;
13537
13538
    /* Allocate sp_ints for:
13539
     *  - pre-computation table
13540
     *  - temporary result
13541
     */
13542
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
13543
    if (err == MP_OKAY) {
13544
        /* Set variable to use allocate memory. */
13545
        tr = t[preCnt];
13546
13547
        /* Initialize all allocated. */
13548
        for (i = 0; i < preCnt; i++) {
13549
            _sp_init_size(t[i], m->used * 2 + 1);
13550
        }
13551
        _sp_init_size(tr, m->used * 2 + 1);
13552
13553
        /* 1. t[1] = b mod m. */
13554
        if (_sp_cmp_abs(b, m) != MP_LT) {
13555
            err = sp_mod(b, m, t[1]);
13556
            /* Handle base == modulus. */
13557
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13558
                _sp_set(r, 0);
13559
                done = 1;
13560
            }
13561
        }
13562
        else {
13563
            /* Copy base into entry of table to contain b^1. */
13564
            _sp_copy(b, t[1]);
13565
        }
13566
    }
13567
13568
    if ((!done) && (err == MP_OKAY)) {
13569
        sp_int_digit mp;
13570
        sp_int_digit n;
13571
13572
        /* Calculate Montgomery multiplier for reduction. */
13573
        _sp_mont_setup(m, &mp);
13574
        /* 2. t[0] = MontNorm(m) = ToMont(1) */
13575
        err = sp_mont_norm(t[0], m);
13576
        if (err == MP_OKAY) {
13577
            /* 3. t[1] = ToMont(t[1]) */
13578
            err = sp_mul(t[1], t[0], t[1]);
13579
        }
13580
        if (err == MP_OKAY) {
13581
            /* t[1] = t[1] mod m, temporary size has to be bigger than t[1]. */
13582
            err = _sp_div(t[1], m, NULL, t[1], t[1]->used + 1);
13583
        }
13584
13585
        /* 4. For i in 2..(2 ^ w) - 1 */
13586
        for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
13587
            /* 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2 */
13588
            if ((i & 1) == 0) {
13589
                err = sp_sqr(t[i/2], t[i]);
13590
            }
13591
            /* 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1] */
13592
            else {
13593
                err = sp_mul(t[i-1], t[1], t[i]);
13594
            }
13595
            /* Montgomery reduce square or multiplication result. */
13596
            if (err == MP_OKAY) {
13597
                err = _sp_mont_red(t[i], m, mp, 0);
13598
            }
13599
        }
13600
13601
        if (err == MP_OKAY) {
13602
            /* 5. cb = w * (bits / w) */
13603
            i = (bits - 1) >> SP_WORD_SHIFT;
13604
            n = e->dp[i--];
13605
            /* Find top bit index in last word. */
13606
            c = bits & (SP_WORD_SIZE - 1);
13607
            if (c == 0) {
13608
                c = SP_WORD_SIZE;
13609
            }
13610
            /* Use as many bits from top to make remaining a multiple of window
13611
             * size.
13612
             */
13613
            if ((bits % winBits) != 0) {
13614
                c -= bits % winBits;
13615
            }
13616
            else {
13617
                c -= winBits;
13618
            }
13619
13620
            /* 5. tr = t[e / (2 ^ cb)] */
13621
            y = (int)(n >> c);
13622
            n <<= SP_WORD_SIZE - c;
13623
            /* 5. Copy table value for first window. */
13624
            _sp_copy(t[y], tr);
13625
13626
            /* 6. For i in cb..w */
13627
            for (; (i >= 0) || (c >= winBits); ) {
13628
                int j;
13629
13630
                /* 6.1. y = e[(i-1)..(i-w)] */
13631
                if (c == 0) {
13632
                    /* Bits up to end of digit */
13633
                    n = e->dp[i--];
13634
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
13635
                    n <<= winBits;
13636
                    c = SP_WORD_SIZE - winBits;
13637
                }
13638
                else if (c < winBits) {
13639
                    /* Bits to end of digit and part of next */
13640
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
13641
                    n = e->dp[i--];
13642
                    c = winBits - c;
13643
                    y |= (int)(n >> (SP_WORD_SIZE - c));
13644
                    n <<= c;
13645
                    c = SP_WORD_SIZE - c;
13646
                }
13647
                else {
13648
                    /* Bits from middle of digit */
13649
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
13650
                    n <<= winBits;
13651
                    c -= winBits;
13652
                }
13653
13654
                /* 6.2. tr = tr ^ (2 * w) */
13655
                for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
13656
                    err = sp_sqr(tr, tr);
13657
                    if (err == MP_OKAY) {
13658
                        err = _sp_mont_red(tr, m, mp, 0);
13659
                    }
13660
                }
13661
13662
                /* 6.3. tr = tr * t[y] */
13663
                if (err == MP_OKAY) {
13664
                    err = sp_mul(tr, t[y], tr);
13665
                }
13666
                if (err == MP_OKAY) {
13667
                    err = _sp_mont_red(tr, m, mp, 0);
13668
                }
13669
            }
13670
        }
13671
13672
        if (err == MP_OKAY) {
13673
            /* 7. tr = FromMont(tr) */
13674
            err = _sp_mont_red(tr, m, mp, 0);
13675
            /* Reduction implementation returns number to range: 0..m-1. */
13676
        }
13677
    }
13678
    if ((!done) && (err == MP_OKAY)) {
13679
        /* 8. r = tr */
13680
        _sp_copy(tr, r);
13681
    }
13682
13683
    FREE_SP_INT_ARRAY(t, NULL);
13684
    return err;
13685
}
13686
13687
#ifndef SP_ALLOC_PREDEFINED
13688
#undef SP_ALLOC
13689
#undef SP_ALLOC_PREDEFINED
13690
#endif
13691
13692
#endif /* !WC_NO_CACHE_RESISTANT */
13693
#endif /* !WC_NO_HARDEN */
13694
13695
/* w = Log2(SP_WORD_SIZE) - 1 */
13696
#if SP_WORD_SIZE == 8
13697
49.6k
    #define EXP2_WINSIZE    2
13698
#elif SP_WORD_SIZE == 16
13699
    #define EXP2_WINSIZE    3
13700
#elif SP_WORD_SIZE == 32
13701
    #define EXP2_WINSIZE    4
13702
#elif SP_WORD_SIZE == 64
13703
    #define EXP2_WINSIZE    5
13704
#else
13705
    #error "sp_exptmod_base_2: Unexpected SP_WORD_SIZE"
13706
#endif
13707
/* Mask is all bits in window set. */
13708
5.85k
#define EXP2_MASK           ((1 << EXP2_WINSIZE) - 1)
13709
13710
/* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
13711
 * Is constant time and cache attack resistant.
13712
 *
13713
 * Calculates value to make mod operations constant time expect when
13714
 * WC_NO_HARDERN defined or modulus fits in one word.
13715
 *
13716
 * Algorithm:
13717
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13718
 *  w: window size based on #bits in word.
13719
 *  1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
13720
 *     else                 tr = 1
13721
 *  2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
13722
 *     else                            a = 0
13723
 *  3. cb = w * (bits / w)
13724
 *  4. y = e / (2 ^ cb)
13725
 *  5. tr = (tr * (2 ^ y) + a) mod m
13726
 *  6. For i in cb..w
13727
 *   6.1. y = e[(i-1)..(i-w)]
13728
 *   6.2. tr = tr ^ (2 * w)
13729
 *   6.3. tr = ((tr * (2 ^ y) + a) mod m
13730
 *  7. if Words(m) > 1 then tr = FromMont(tr)
13731
 *  8. r = tr
13732
 *
13733
 * @param  [in]   e       SP integer that is the exponent.
13734
 * @param  [in]   digits  Number of digits in base to use. May be greater than
13735
 *                        count of bits in b.
13736
 * @param  [in]   m       SP integer that is the modulus.
13737
 * @param  [out]  r       SP integer to hold result.
13738
 *
13739
 * @return  MP_OKAY on success.
13740
 * @return  MP_MEM when dynamic memory allocation fails.
13741
 */
13742
static int _sp_exptmod_base_2(const sp_int* e, int digits, const sp_int* m,
13743
    sp_int* r)
13744
583
{
13745
583
    int i = 0;
13746
583
    int c = 0;
13747
583
    int y;
13748
583
    int err = MP_OKAY;
13749
583
    sp_int_digit mp = 0;
13750
583
    sp_int_digit n = 0;
13751
583
#ifndef WC_NO_HARDEN
13752
583
    sp_int* a = NULL;
13753
583
    sp_int* tr = NULL;
13754
583
    DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
13755
#else
13756
    DECL_SP_INT(tr, m->used * 2 + 1);
13757
#endif
13758
583
    int useMont = (m->used > 1);
13759
13760
#if 0
13761
    sp_print_int(2, "a");
13762
    sp_print(e, "b");
13763
    sp_print(m, "m");
13764
#endif
13765
13766
583
#ifndef WC_NO_HARDEN
13767
    /* Allocate sp_ints for:
13768
     *  - constant time add value for mod operation
13769
     *  - temporary result
13770
     */
13771
583
    ALLOC_SP_INT_ARRAY(d, m->used * 2U + 1U, 2, err, NULL);
13772
#else
13773
    /* Allocate sp_int for temporary result. */
13774
    ALLOC_SP_INT(tr, m->used * 2U + 1U, err, NULL);
13775
#endif
13776
583
    if (err == MP_OKAY) {
13777
582
    #ifndef WC_NO_HARDEN
13778
582
        a  = d[0];
13779
582
        tr = d[1];
13780
13781
582
        _sp_init_size(a, (sp_size_t)(m->used * 2 + 1));
13782
582
    #endif
13783
582
        _sp_init_size(tr, (sp_size_t)(m->used * 2 + 1));
13784
13785
582
    }
13786
13787
583
    if ((err == MP_OKAY) && useMont) {
13788
        /* Calculate Montgomery multiplier for reduction. */
13789
517
        _sp_mont_setup(m, &mp);
13790
517
    }
13791
583
    if (err == MP_OKAY) {
13792
        /* 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
13793
         *    else                 tr = 1
13794
         */
13795
582
        if (useMont) {
13796
            /* Calculate Montgomery normalizer for modulus - 1 in Montgomery
13797
             * form.
13798
             */
13799
517
            err = sp_mont_norm(tr, m);
13800
517
        }
13801
65
        else {
13802
             /* For single word modulus don't use Montgomery form. */
13803
65
            err = sp_set(tr, 1);
13804
65
        }
13805
582
    }
13806
    /* 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
13807
     *    else                            a = 0
13808
     */
13809
583
#ifndef WC_NO_HARDEN
13810
583
    if ((err == MP_OKAY) && useMont) {
13811
517
        err = sp_mul_2d(m, 1 << EXP2_WINSIZE, a);
13812
517
    }
13813
583
#endif
13814
13815
583
    if (err == MP_OKAY) {
13816
        /* 3. cb = w * (bits / w) */
13817
582
        i = digits - 1;
13818
582
        n = e->dp[i--];
13819
582
        c = SP_WORD_SIZE;
13820
582
    #if EXP2_WINSIZE != 1
13821
582
        c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
13822
582
        if (c != SP_WORD_SIZE) {
13823
            /* 4. y = e / (2 ^ cb) */
13824
0
            y = (int)(n >> c);
13825
0
            n <<= SP_WORD_SIZE - c;
13826
0
        }
13827
582
        else
13828
582
    #endif
13829
582
        {
13830
            /* 4. y = e / (2 ^ cb) */
13831
582
            y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
13832
582
            n <<= EXP2_WINSIZE;
13833
582
            c -= EXP2_WINSIZE;
13834
582
        }
13835
13836
        /* 5. tr = (tr * (2 ^ y) + a) mod m */
13837
582
        err = sp_mul_2d(tr, y, tr);
13838
582
    }
13839
583
#ifndef WC_NO_HARDEN
13840
583
    if ((err == MP_OKAY) && useMont) {
13841
        /* Add value to make mod operation constant time. */
13842
517
        err = sp_add(tr, a, tr);
13843
517
    }
13844
583
#endif
13845
583
    if (err == MP_OKAY) {
13846
582
        err = sp_mod(tr, m, tr);
13847
582
    }
13848
    /* 6. For i in cb..w */
13849
7.03k
    for (; (err == MP_OKAY) && ((i >= 0) || (c >= EXP2_WINSIZE)); ) {
13850
6.45k
        int j;
13851
13852
        /* 6.1. y = e[(i-1)..(i-w)] */
13853
6.45k
        if (c == 0) {
13854
            /* Bits from next digit. */
13855
1.18k
            n = e->dp[i--];
13856
1.18k
            y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
13857
1.18k
            n <<= EXP2_WINSIZE;
13858
1.18k
            c = SP_WORD_SIZE - EXP2_WINSIZE;
13859
1.18k
        }
13860
    #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
13861
        else if (c < EXP2_WINSIZE) {
13862
            /* Bits to end of digit and part of next */
13863
            y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
13864
            n = e->dp[i--];
13865
            c = EXP2_WINSIZE - c;
13866
            y |= (int)(n >> (SP_WORD_SIZE - c));
13867
            n <<= c;
13868
            c = SP_WORD_SIZE - c;
13869
        }
13870
    #endif
13871
5.27k
        else {
13872
            /* Bits from middle of digit */
13873
5.27k
            y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
13874
5.27k
            n <<= EXP2_WINSIZE;
13875
5.27k
            c -= EXP2_WINSIZE;
13876
5.27k
        }
13877
13878
        /* 6.2. tr = tr ^ (2 * w) */
13879
19.3k
        for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
13880
12.8k
            err = sp_sqr(tr, tr);
13881
12.8k
            if (err == MP_OKAY) {
13882
12.8k
                if (useMont) {
13883
5.91k
                    err = _sp_mont_red(tr, m, mp, 0);
13884
5.91k
                }
13885
6.97k
                else {
13886
6.97k
                    err = sp_mod(tr, m, tr);
13887
6.97k
                }
13888
12.8k
            }
13889
12.8k
        }
13890
13891
        /* 6.3. tr = ((tr * (2 ^ y) + a) mod m */
13892
6.45k
        if (err == MP_OKAY) {
13893
6.43k
            err = sp_mul_2d(tr, y, tr);
13894
6.43k
        }
13895
6.45k
    #ifndef WC_NO_HARDEN
13896
6.45k
        if ((err == MP_OKAY) && useMont) {
13897
            /* Add value to make mod operation constant time. */
13898
2.95k
            err = sp_add(tr, a, tr);
13899
2.95k
        }
13900
6.45k
    #endif
13901
6.45k
        if (err == MP_OKAY) {
13902
            /* Reduce current result by modulus. */
13903
6.43k
            err = sp_mod(tr, m, tr);
13904
6.43k
        }
13905
6.45k
    }
13906
13907
    /* 7. if Words(m) > 1 then tr = FromMont(tr) */
13908
583
    if ((err == MP_OKAY) && useMont) {
13909
512
        err = _sp_mont_red(tr, m, mp, 0);
13910
        /* Reduction implementation returns number to range: 0..m-1. */
13911
512
    }
13912
583
    if (err == MP_OKAY) {
13913
        /* 8. r = tr */
13914
568
        _sp_copy(tr, r);
13915
568
    }
13916
13917
#if 0
13918
    sp_print(r, "rme");
13919
#endif
13920
13921
583
#ifndef WC_NO_HARDEN
13922
583
    FREE_SP_INT_ARRAY(d, NULL);
13923
#else
13924
    FREE_SP_INT(tr, NULL);
13925
#endif
13926
583
    return err;
13927
583
}
13928
#endif
13929
13930
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
13931
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
13932
    defined(OPENSSL_ALL)
13933
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13934
 *
13935
 * Error returned when parameters r == e or r == m and base >= modulus.
13936
 *
13937
 * @param  [in]   b       SP integer that is the base.
13938
 * @param  [in]   e       SP integer that is the exponent.
13939
 * @param  [in]   digits  Number of digits in exponent to use. May be greater
13940
 *                        than count of digits in e.
13941
 * @param  [in]   m       SP integer that is the modulus.
13942
 * @param  [out]  r       SP integer to hold result.
13943
 *
13944
 * @return  MP_OKAY on success.
13945
 * @return  MP_VAL when b, e, m or r is NULL, digits is negative, or m <= 0 or
13946
 *          e is negative.
13947
 * @return  MP_MEM when dynamic memory allocation fails.
13948
 */
13949
int sp_exptmod_ex(const sp_int* b, const sp_int* e, int digits, const sp_int* m,
13950
    sp_int* r)
13951
709k
{
13952
709k
    int err = MP_OKAY;
13953
709k
    int done = 0;
13954
709k
    int mBits = sp_count_bits(m);
13955
709k
    int bBits = sp_count_bits(b);
13956
709k
    int eBits = sp_count_bits(e);
13957
13958
709k
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL) ||
13959
709k
             (digits < 0)) {
13960
0
        err = MP_VAL;
13961
0
    }
13962
    /* Ensure m is not too big. */
13963
709k
    else if (m->used * 2 >= SP_INT_DIGITS) {
13964
20
        err = MP_VAL;
13965
20
    }
13966
13967
#if 0
13968
    if (err == MP_OKAY) {
13969
        sp_print(b, "a");
13970
        sp_print(e, "b");
13971
        sp_print(m, "m");
13972
    }
13973
#endif
13974
13975
    /* Check for invalid modulus. */
13976
709k
    if ((err == MP_OKAY) && sp_iszero(m)) {
13977
133
        err = MP_VAL;
13978
133
    }
13979
709k
#ifdef WOLFSSL_SP_INT_NEGATIVE
13980
    /* Check for unsupported negative values of exponent and modulus. */
13981
709k
    if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
13982
97
        err = MP_VAL;
13983
97
    }
13984
709k
#endif
13985
13986
    /* Check for degenerate cases. */
13987
709k
    if ((err == MP_OKAY) && sp_isone(m)) {
13988
81
        _sp_set(r, 0);
13989
81
        done = 1;
13990
81
    }
13991
709k
    if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
13992
7.17k
        _sp_set(r, 1);
13993
7.17k
        done = 1;
13994
7.17k
    }
13995
13996
    /* Ensure base is less than modulus. */
13997
709k
    if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
13998
15.9k
        if ((r == e) || (r == m)) {
13999
7
            err = MP_VAL;
14000
7
        }
14001
15.9k
        if (err == MP_OKAY) {
14002
15.9k
            err = sp_mod(b, m, r);
14003
15.9k
        }
14004
15.9k
        if (err == MP_OKAY) {
14005
15.9k
            b = r;
14006
15.9k
        }
14007
15.9k
    }
14008
    /* Check for degenerate case of base. */
14009
709k
    if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
14010
161
        _sp_set(r, 0);
14011
161
        done = 1;
14012
161
    }
14013
14014
    /* Ensure SP integers have space for intermediate values. */
14015
709k
    if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
14016
45
        err = MP_VAL;
14017
45
    }
14018
14019
709k
    if ((!done) && (err == MP_OKAY)) {
14020
        /* Use code optimized for specific sizes if possible */
14021
#if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
14022
    ((defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
14023
        defined(WOLFSSL_HAVE_SP_DH))
14024
    #ifndef WOLFSSL_SP_NO_2048
14025
        if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
14026
                (eBits <= 1024)) {
14027
            err = sp_ModExp_1024((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14028
            done = 1;
14029
        }
14030
        else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
14031
                 (eBits <= 2048)) {
14032
            err = sp_ModExp_2048((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14033
            done = 1;
14034
        }
14035
        else
14036
    #endif
14037
    #ifndef WOLFSSL_SP_NO_3072
14038
        if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
14039
                (eBits <= 1536)) {
14040
            err = sp_ModExp_1536((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14041
            done = 1;
14042
        }
14043
        else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
14044
                 (eBits <= 3072)) {
14045
            err = sp_ModExp_3072((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14046
            done = 1;
14047
        }
14048
        else
14049
    #endif
14050
    #ifdef WOLFSSL_SP_4096
14051
        if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
14052
                (eBits <= 4096)) {
14053
            err = sp_ModExp_4096((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14054
            done = 1;
14055
        }
14056
        else
14057
    #endif
14058
#endif
14059
701k
        {
14060
            /* SP does not support size. */
14061
701k
        }
14062
701k
    }
14063
709k
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(OPENSSL_ALL)
14064
#if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
14065
    defined(NO_DH)
14066
    if ((!done) && (err == MP_OKAY)) {
14067
        /* Use non-constant time version - fastest. */
14068
        err = sp_exptmod_nct(b, e, m, r);
14069
    }
14070
#else
14071
709k
#if defined(WOLFSSL_SP_MATH_ALL) || defined(OPENSSL_ALL)
14072
709k
    if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
14073
10.3k
         mp_isodd(m)) {
14074
        /* Use the generic base 2 implementation. */
14075
9.62k
        err = _sp_exptmod_base_2(e, digits, m, r);
14076
9.62k
    }
14077
699k
    else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
14078
669k
    #ifndef WC_NO_HARDEN
14079
        /* Use constant time version hardened against timing attacks and
14080
         * cache attacks when WC_NO_CACHE_RESISTANT not defined. */
14081
669k
        err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
14082
    #else
14083
        /* Use non-constant time version - fastest. */
14084
        err = sp_exptmod_nct(b, e, m, r);
14085
    #endif
14086
669k
    }
14087
30.7k
    else
14088
30.7k
#endif /* WOLFSSL_SP_MATH_ALL || OPENSSL_ALL */
14089
30.7k
    if ((!done) && (err == MP_OKAY)) {
14090
        /* Otherwise use the generic implementation hardened against
14091
         * timing and cache attacks. */
14092
22.9k
        err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
14093
22.9k
    }
14094
709k
#endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
14095
#else
14096
    if ((!done) && (err == MP_OKAY)) {
14097
        err = MP_VAL;
14098
    }
14099
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
14100
14101
709k
    (void)mBits;
14102
709k
    (void)bBits;
14103
709k
    (void)eBits;
14104
709k
    (void)digits;
14105
14106
#if 0
14107
    if (err == MP_OKAY) {
14108
        sp_print(r, "rme");
14109
    }
14110
#endif
14111
709k
    return err;
14112
709k
}
14113
#endif
14114
14115
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14116
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
14117
    defined(OPENSSL_ALL)
14118
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14119
 *
14120
 * @param  [in]   b  SP integer that is the base.
14121
 * @param  [in]   e  SP integer that is the exponent.
14122
 * @param  [in]   m  SP integer that is the modulus.
14123
 * @param  [out]  r  SP integer to hold result.
14124
 *
14125
 * @return  MP_OKAY on success.
14126
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14127
 * @return  MP_MEM when dynamic memory allocation fails.
14128
 */
14129
int sp_exptmod(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
14130
839k
{
14131
839k
    int err = MP_OKAY;
14132
14133
    /* Validate parameters. */
14134
839k
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
14135
0
        err = MP_VAL;
14136
0
    }
14137
839k
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
14138
839k
    if (err == MP_OKAY) {
14139
839k
        err = sp_exptmod_ex(b, e, (int)e->used, m, r);
14140
839k
    }
14141
839k
    RESTORE_VECTOR_REGISTERS();
14142
839k
    return err;
14143
839k
}
14144
#endif
14145
14146
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
14147
#if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
14148
14149
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14150
 * Creates a window of precalculated exponents with base in Montgomery form.
14151
 * Sliding window and is NOT constant time.
14152
 *
14153
 * n-bit window is: (b^(2^(n-1))*b^0)...(b^(2^(n-1))*b^(2^(n-1)-1))
14154
 * e.g. when n=6, b^32..b^63
14155
 * Algorithm:
14156
 *   1. Ensure base is less than modulus.
14157
 *   2. Convert base to Montgomery form
14158
 *   3. Set result to table entry for top window bits, or
14159
 *      if less than windows bits in exponent, 1 in Montgomery form.
14160
 *   4. While at least window bits left:
14161
 *     4.1. Count number of and skip leading 0 bits unless less then window bits
14162
 *          left.
14163
 *     4.2. Montgomery square result for each leading 0 and window bits if bits
14164
 *          left.
14165
 *     4.3. Break if less than window bits left.
14166
 *     4.4. Get top window bits from expononent and drop.
14167
 *     4.5. Montgomery multiply result by table entry.
14168
 *   5. While bits left:
14169
 *     5.1. Montogmery square result
14170
 *     5.2. If exponent bit set
14171
 *       5.2.1. Montgomery multiply result by Montgomery form of base.
14172
 *   6. Convert result back from Montgomery form.
14173
 *
14174
 * @param  [in]   b     SP integer that is the base.
14175
 * @param  [in]   e     SP integer that is the exponent.
14176
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
14177
 *                      count of bits in e.
14178
 * @param  [in]   m     SP integer that is the modulus.
14179
 * @param  [out]  r     SP integer to hold result.
14180
 *
14181
 * @return  MP_OKAY on success.
14182
 * @return  MP_MEM when dynamic memory allocation fails.
14183
 */
14184
static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
14185
    sp_int* r)
14186
17.2k
{
14187
17.2k
    int i = 0;
14188
17.2k
    int bits;
14189
17.2k
    int winBits;
14190
17.2k
    int preCnt;
14191
17.2k
    int err = MP_OKAY;
14192
17.2k
    int done = 0;
14193
17.2k
    sp_int* tr = NULL;
14194
17.2k
    sp_int* bm = NULL;
14195
    /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
14196
17.2k
#ifndef WOLFSSL_SP_NO_MALLOC
14197
17.2k
    DECL_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
14198
#else
14199
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
14200
#endif
14201
14202
17.2k
    bits = sp_count_bits(e);
14203
14204
    /* Window bits based on number of pre-calculations versus number of loop
14205
     * calculations.
14206
     * Exponents for RSA and DH will result in 6-bit windows.
14207
     * Note: for 4096-bit values, 7-bit window is slightly better.
14208
     */
14209
17.2k
    if (bits > 450) {
14210
120
        winBits = 6;
14211
120
    }
14212
17.1k
    else if (bits <= 21) {
14213
16.4k
        winBits = 2;
14214
16.4k
    }
14215
685
    else if (bits <= 36) {
14216
171
        winBits = 3;
14217
171
    }
14218
514
    else if (bits <= 140) {
14219
370
        winBits = 4;
14220
370
    }
14221
144
    else {
14222
144
        winBits = 5;
14223
144
    }
14224
    /* Top bit of exponent fixed as 1 for pre-calculated window. */
14225
17.2k
    preCnt = 1 << (winBits - 1);
14226
14227
    /* Allocate sp_ints for:
14228
     *  - pre-computation table
14229
     *  - temporary result
14230
     *  - Montgomery form of base
14231
     */
14232
17.2k
#ifndef WOLFSSL_SP_NO_MALLOC
14233
17.2k
    ALLOC_DYN_SP_INT_ARRAY(t, m->used * 2U + 1U, (size_t)preCnt + 2, err, NULL);
14234
#else
14235
    ALLOC_SP_INT_ARRAY(t, m->used * 2U + 1U, (size_t)preCnt + 2, err, NULL);
14236
#endif
14237
17.2k
    if (err == MP_OKAY) {
14238
        /* Set variables to use allocate memory. */
14239
17.2k
        tr = t[preCnt + 0];
14240
17.2k
        bm = t[preCnt + 1];
14241
14242
        /* Initialize all allocated  */
14243
59.5k
        for (i = 0; i < preCnt; i++) {
14244
42.3k
            _sp_init_size(t[i], (sp_size_t)(m->used * 2 + 1));
14245
42.3k
        }
14246
17.2k
        _sp_init_size(tr, (sp_size_t)(m->used * 2 + 1));
14247
17.2k
        _sp_init_size(bm, (sp_size_t)(m->used * 2 + 1));
14248
14249
        /* 1. Ensure base is less than modulus. */
14250
17.2k
        if (_sp_cmp_abs(b, m) != MP_LT) {
14251
2.77k
            err = sp_mod(b, m, bm);
14252
            /* Handle base == modulus. */
14253
2.77k
            if ((err == MP_OKAY) && sp_iszero(bm)) {
14254
17
                _sp_set(r, 0);
14255
17
                done = 1;
14256
17
            }
14257
2.77k
        }
14258
14.4k
        else {
14259
            /* Copy base into Montogmery base variable. */
14260
14.4k
            _sp_copy(b, bm);
14261
14.4k
        }
14262
17.2k
    }
14263
14264
17.2k
    if ((!done) && (err == MP_OKAY)) {
14265
17.1k
        int y = 0;
14266
17.1k
        int c = 0;
14267
17.1k
        sp_int_digit mp;
14268
14269
        /* Calculate Montgomery multiplier for reduction. */
14270
17.1k
        _sp_mont_setup(m, &mp);
14271
        /* Calculate Montgomery normalizer for modulus. */
14272
17.1k
        err = sp_mont_norm(t[0], m);
14273
17.1k
        if (err == MP_OKAY) {
14274
            /* 2. Convert base to Montgomery form. */
14275
17.1k
            err = sp_mul(bm, t[0], bm);
14276
17.1k
        }
14277
17.1k
        if (err == MP_OKAY) {
14278
            /* bm = bm mod m, temporary size has to be bigger than bm->used. */
14279
17.1k
            err = _sp_div(bm, m, NULL, bm, bm->used + 1U);
14280
17.1k
        }
14281
17.1k
        if (err == MP_OKAY) {
14282
            /* Copy Montgomery form of base into first element of table. */
14283
17.1k
            _sp_copy(bm, t[0]);
14284
17.1k
        }
14285
        /* Calculate b^(2^(winBits-1)) */
14286
35.9k
        for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
14287
18.8k
            err = sp_sqr(t[0], t[0]);
14288
18.8k
            if (err == MP_OKAY) {
14289
18.7k
                err = _sp_mont_red(t[0], m, mp, 0);
14290
18.7k
            }
14291
18.8k
        }
14292
        /* For each table entry after first. */
14293
41.5k
        for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
14294
            /* Multiply previous entry by the base in Mont form into table. */
14295
24.3k
            err = sp_mul(t[i-1], bm, t[i]);
14296
24.3k
            if (err == MP_OKAY) {
14297
24.3k
                err = _sp_mont_red(t[i], m, mp, 0);
14298
24.3k
            }
14299
24.3k
        }
14300
14301
        /* 3. Set result to table entry for top window bits, or
14302
         *    if less than windows bits in exponent, 1 in Montgomery form.
14303
         */
14304
17.1k
        if (err == MP_OKAY) {
14305
17.1k
            sp_int_digit n;
14306
            /* Mask for calculating index into pre-computed table. */
14307
17.1k
            sp_int_digit mask = (sp_int_digit)preCnt - 1;
14308
14309
            /* Find the top bit. */
14310
17.1k
            i = (bits - 1) >> SP_WORD_SHIFT;
14311
17.1k
            n = e->dp[i--];
14312
17.1k
            c = bits % SP_WORD_SIZE;
14313
17.1k
            if (c == 0) {
14314
191
                c = SP_WORD_SIZE;
14315
191
            }
14316
            /* Put top bit at highest offset in digit. */
14317
17.1k
            n <<= SP_WORD_SIZE - c;
14318
14319
17.1k
            if (bits >= winBits) {
14320
                /* Top bit set. Copy from window. */
14321
17.1k
                if (c < winBits) {
14322
                    /* Bits to end of digit and part of next */
14323
66
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
14324
66
                    n = e->dp[i--];
14325
66
                    c = winBits - c;
14326
66
                    y |= (int)(n >> (SP_WORD_SIZE - c));
14327
66
                    n <<= c;
14328
66
                    c = SP_WORD_SIZE - c;
14329
66
                }
14330
17.0k
                else {
14331
                    /* Bits from middle of digit */
14332
17.0k
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
14333
17.0k
                    n <<= winBits;
14334
17.0k
                    c -= winBits;
14335
17.0k
                }
14336
17.1k
                _sp_copy(t[y], tr);
14337
17.1k
            }
14338
16
            else {
14339
                /* 1 in Montgomery form. */
14340
16
                err = sp_mont_norm(tr, m);
14341
16
            }
14342
14343
            /* 4. While at least window bits left. */
14344
63.7k
            while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
14345
                /* Number of squares to before due to top bits being 0. */
14346
63.1k
                int sqrs = 0;
14347
14348
                /* 4.1. Count number of and skip leading 0 bits unless less
14349
                 *      than window bits.
14350
                 */
14351
332k
                do {
14352
                    /* Make sure n has bits from the right digit. */
14353
332k
                    if (c == 0) {
14354
2.19k
                        n = e->dp[i--];
14355
2.19k
                        c = SP_WORD_SIZE;
14356
2.19k
                    }
14357
                    /* Mask off the next bit. */
14358
332k
                    if ((n & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) != 0) {
14359
46.6k
                        break;
14360
46.6k
                    }
14361
14362
                    /* Another square needed. */
14363
285k
                    sqrs++;
14364
                    /* Skip bit. */
14365
285k
                    n <<= 1;
14366
285k
                    c--;
14367
285k
                }
14368
285k
                while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits)));
14369
14370
63.1k
                if ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
14371
                    /* Add squares needed before using table entry. */
14372
46.6k
                    sqrs += winBits;
14373
46.6k
                }
14374
14375
                /* 4.2. Montgomery square result for each leading 0 and window
14376
                 *      bits if bits left.
14377
                 */
14378
611k
                for (; (err == MP_OKAY) && (sqrs > 0); sqrs--) {
14379
547k
                    err = sp_sqr(tr, tr);
14380
547k
                    if (err == MP_OKAY) {
14381
547k
                        err = _sp_mont_red(tr, m, mp, 0);
14382
547k
                    }
14383
547k
                }
14384
14385
                /* 4.3. Break if less than window bits left. */
14386
63.1k
                if ((err == MP_OKAY) && (i < 0) && (c < winBits)) {
14387
16.4k
                    break;
14388
16.4k
                }
14389
14390
                /* 4.4. Get top window bits from exponent and drop. */
14391
46.6k
                if (err == MP_OKAY) {
14392
46.6k
                    if (c == 0) {
14393
                        /* Bits from next digit. */
14394
0
                        n = e->dp[i--];
14395
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14396
0
                        n <<= winBits;
14397
0
                        c = SP_WORD_SIZE - winBits;
14398
0
                    }
14399
46.6k
                    else if (c < winBits) {
14400
                        /* Bits to end of digit and part of next. */
14401
3.81k
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14402
3.81k
                        n = e->dp[i--];
14403
3.81k
                        c = winBits - c;
14404
3.81k
                        y |= (int)(n >> (SP_WORD_SIZE - c));
14405
3.81k
                        n <<= c;
14406
3.81k
                        c = SP_WORD_SIZE - c;
14407
3.81k
                    }
14408
42.8k
                    else {
14409
                        /* Bits from middle of digit. */
14410
42.8k
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14411
42.8k
                        n <<= winBits;
14412
42.8k
                        c -= winBits;
14413
42.8k
                    }
14414
46.6k
                    y &= (int)mask;
14415
46.6k
                }
14416
14417
                /* 4.5. Montgomery multiply result by table entry. */
14418
46.6k
                if (err == MP_OKAY) {
14419
46.6k
                    err = sp_mul(tr, t[y], tr);
14420
46.6k
                }
14421
46.6k
                if (err == MP_OKAY) {
14422
46.6k
                    err = _sp_mont_red(tr, m, mp, 0);
14423
46.6k
                }
14424
46.6k
            }
14425
14426
            /* Finished multiplying in table entries. */
14427
17.1k
            if ((err == MP_OKAY) && (c > 0)) {
14428
                /* Handle remaining bits.
14429
                 * Window values have top bit set and can't be used. */
14430
16.8k
                n = e->dp[0];
14431
                /*  5. While bits left: */
14432
34.5k
                for (--c; (err == MP_OKAY) && (c >= 0); c--) {
14433
                    /* 5.1. Montogmery square result */
14434
17.6k
                    err = sp_sqr(tr, tr);
14435
17.6k
                    if (err == MP_OKAY) {
14436
17.6k
                        err = _sp_mont_red(tr, m, mp, 0);
14437
17.6k
                    }
14438
                    /* 5.2. If exponent bit set */
14439
17.6k
                    if ((err == MP_OKAY) && ((n >> c) & 1)) {
14440
                        /* 5.2.1. Montgomery multiply result by Montgomery form
14441
                         * of base.
14442
                         */
14443
17.0k
                        err = sp_mul(tr, bm, tr);
14444
17.0k
                        if (err == MP_OKAY) {
14445
17.0k
                            err = _sp_mont_red(tr, m, mp, 0);
14446
17.0k
                        }
14447
17.0k
                    }
14448
17.6k
                }
14449
16.8k
            }
14450
17.1k
        }
14451
14452
17.1k
        if (err == MP_OKAY) {
14453
            /* 6. Convert result back from Montgomery form. */
14454
17.0k
            err = _sp_mont_red(tr, m, mp, 0);
14455
            /* Reduction implementation returns number to range: 0..m-1. */
14456
17.0k
        }
14457
17.1k
    }
14458
17.2k
    if ((!done) && (err == MP_OKAY)) {
14459
        /* Copy temporary result into parameter. */
14460
17.0k
        _sp_copy(tr, r);
14461
17.0k
    }
14462
14463
17.2k
#ifndef WOLFSSL_SP_NO_MALLOC
14464
17.2k
    FREE_DYN_SP_INT_ARRAY(t, NULL);
14465
#else
14466
    FREE_SP_INT_ARRAY(t, NULL);
14467
#endif
14468
17.2k
    return err;
14469
17.2k
}
14470
14471
#else
14472
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14473
 * Non-constant time implementation.
14474
 *
14475
 * Algorithm:
14476
 *   1. Convert base to Montgomery form
14477
 *   2. Set result to base (assumes exponent is not zero)
14478
 *   3. For each bit in exponent starting at second highest
14479
 *     3.1. Montogmery square result
14480
 *     3.2. If exponent bit set
14481
 *       3.2.1. Montgomery multiply result by Montgomery form of base.
14482
 *   4. Convert result back from Montgomery form.
14483
 *
14484
 * @param  [in]   b  SP integer that is the base.
14485
 * @param  [in]   e  SP integer that is the exponent.
14486
 * @param  [in]   m  SP integer that is the modulus.
14487
 * @param  [out]  r  SP integer to hold result.
14488
 *
14489
 * @return  MP_OKAY on success.
14490
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14491
 * @return  MP_MEM when dynamic memory allocation fails.
14492
 */
14493
static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
14494
    sp_int* r)
14495
{
14496
    int i;
14497
    int err = MP_OKAY;
14498
    int done = 0;
14499
    int y = 0;
14500
    int bits = sp_count_bits(e);
14501
    sp_int_digit mp;
14502
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
14503
14504
    /* Allocate memory for:
14505
     *  - Montgomery form of base
14506
     *  - Temporary result (in case r is same var as another parameter). */
14507
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
14508
    if (err == MP_OKAY) {
14509
        _sp_init_size(t[0], m->used * 2 + 1);
14510
        _sp_init_size(t[1], m->used * 2 + 1);
14511
14512
        /* Ensure base is less than modulus and copy into temp. */
14513
        if (_sp_cmp_abs(b, m) != MP_LT) {
14514
            err = sp_mod(b, m, t[0]);
14515
            /* Handle base == modulus. */
14516
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
14517
                _sp_set(r, 0);
14518
                done = 1;
14519
            }
14520
        }
14521
        else {
14522
            /* Copy base into temp. */
14523
            _sp_copy(b, t[0]);
14524
        }
14525
    }
14526
14527
    if ((!done) && (err == MP_OKAY)) {
14528
        /* Calculate Montgomery multiplier for reduction. */
14529
        _sp_mont_setup(m, &mp);
14530
        /* Calculate Montgomery normalizer for modulus. */
14531
        err = sp_mont_norm(t[1], m);
14532
        if (err == MP_OKAY) {
14533
            /* 1. Convert base to Montgomery form. */
14534
            err = sp_mul(t[0], t[1], t[0]);
14535
        }
14536
        if (err == MP_OKAY) {
14537
            /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
14538
            err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
14539
        }
14540
        if (err == MP_OKAY) {
14541
            /* 2. Result starts as Montgomery form of base (assuming e > 0). */
14542
            _sp_copy(t[0], t[1]);
14543
        }
14544
14545
        /* 3. For each bit in exponent starting at second highest. */
14546
        for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
14547
            /* 3.1. Montgomery square result. */
14548
            err = sp_sqr(t[0], t[0]);
14549
            if (err == MP_OKAY) {
14550
                err = _sp_mont_red(t[0], m, mp, 0);
14551
            }
14552
            if (err == MP_OKAY) {
14553
                /* Get bit and index i. */
14554
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
14555
                /* 3.2. If exponent bit set */
14556
                if (y != 0) {
14557
                    /* 3.2.1. Montgomery multiply result by Mont of base. */
14558
                    err = sp_mul(t[0], t[1], t[0]);
14559
                    if (err == MP_OKAY) {
14560
                        err = _sp_mont_red(t[0], m, mp, 0);
14561
                    }
14562
                }
14563
            }
14564
        }
14565
        if (err == MP_OKAY) {
14566
            /* 4. Convert from Montgomery form. */
14567
            err = _sp_mont_red(t[0], m, mp, 0);
14568
            /* Reduction implementation returns number of range 0..m-1. */
14569
        }
14570
    }
14571
    if ((!done) && (err == MP_OKAY)) {
14572
        /* Copy temporary result into parameter. */
14573
        _sp_copy(t[0], r);
14574
    }
14575
14576
    FREE_SP_INT_ARRAY(t, NULL);
14577
    return err;
14578
}
14579
#endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
14580
14581
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14582
 * Non-constant time implementation.
14583
 *
14584
 * @param  [in]   b  SP integer that is the base.
14585
 * @param  [in]   e  SP integer that is the exponent.
14586
 * @param  [in]   m  SP integer that is the modulus.
14587
 * @param  [out]  r  SP integer to hold result.
14588
 *
14589
 * @return  MP_OKAY on success.
14590
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14591
 * @return  MP_MEM when dynamic memory allocation fails.
14592
 */
14593
int sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
14594
17.2k
{
14595
17.2k
    int err = MP_OKAY;
14596
14597
    /* Validate parameters. */
14598
17.2k
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
14599
0
        err = MP_VAL;
14600
0
    }
14601
14602
#if 0
14603
    if (err == MP_OKAY) {
14604
        sp_print(b, "a");
14605
        sp_print(e, "b");
14606
        sp_print(m, "m");
14607
    }
14608
#endif
14609
14610
17.2k
    if (err != MP_OKAY) {
14611
0
    }
14612
    /* Handle special cases. */
14613
17.2k
    else if (sp_iszero(m)) {
14614
14
        err = MP_VAL;
14615
14
    }
14616
17.2k
#ifdef WOLFSSL_SP_INT_NEGATIVE
14617
17.2k
    else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
14618
24
        err = MP_VAL;
14619
24
    }
14620
17.1k
#endif
14621
    /* x mod 1 is always 0. */
14622
17.1k
    else if (sp_isone(m)) {
14623
5
        _sp_set(r, 0);
14624
5
    }
14625
    /* b^0 mod m = 1 mod m = 1. */
14626
17.1k
    else if (sp_iszero(e)) {
14627
110
        _sp_set(r, 1);
14628
110
    }
14629
    /* 0^x mod m = 0 mod m = 0. */
14630
17.0k
    else if (sp_iszero(b)) {
14631
12
        _sp_set(r, 0);
14632
12
    }
14633
    /* Ensure SP integers have space for intermediate values. */
14634
17.0k
    else if (m->used * 2 >= r->size) {
14635
3
        err = MP_VAL;
14636
3
    }
14637
17.0k
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
14638
17.0k
    else if (mp_iseven(m)) {
14639
49
        err = _sp_exptmod_ex(b, e, (int)(e->used * SP_WORD_SIZE), m, r);
14640
49
    }
14641
16.9k
#endif
14642
16.9k
    else {
14643
16.9k
        err = _sp_exptmod_nct(b, e, m, r);
14644
16.9k
    }
14645
14646
#if 0
14647
    if (err == MP_OKAY) {
14648
        sp_print(r, "rme");
14649
    }
14650
#endif
14651
14652
17.2k
    return err;
14653
17.2k
}
14654
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
14655
14656
/***************
14657
 * 2^e functions
14658
 ***************/
14659
14660
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
14661
/* Divide by 2^e: r = a >> e and rem = bits shifted out
14662
 *
14663
 * @param  [in]   a    SP integer to divide.
14664
 * @param  [in]   e    Exponent bits (dividing by 2^e).
14665
 * @param  [in]   m    SP integer that is the modulus.
14666
 * @param  [out]  r    SP integer to hold result.
14667
 * @param  [out]  rem  SP integer to hold remainder.
14668
 *
14669
 * @return  MP_OKAY on success.
14670
 * @return  MP_VAL when a is NULL or e is negative.
14671
 */
14672
int sp_div_2d(const sp_int* a, int e, sp_int* r, sp_int* rem)
14673
299
{
14674
299
    int err = MP_OKAY;
14675
14676
299
    if ((a == NULL) || (e < 0)) {
14677
0
        err = MP_VAL;
14678
0
    }
14679
14680
299
    if (err == MP_OKAY) {
14681
        /* Number of bits remaining after shift. */
14682
299
        int remBits = sp_count_bits(a) - e;
14683
14684
299
        if (remBits <= 0) {
14685
            /* Shifting down by more bits than in number. */
14686
123
            _sp_zero(r);
14687
123
            if (rem != NULL) {
14688
75
                err = sp_copy(a, rem);
14689
75
            }
14690
123
        }
14691
176
        else {
14692
176
            if (rem != NULL) {
14693
                /* Copy a in to remainder. */
14694
109
                err = sp_copy(a, rem);
14695
109
            }
14696
176
            if (err == MP_OKAY) {
14697
                /* Shift a down by into result. */
14698
164
                err = sp_rshb(a, e, r);
14699
164
            }
14700
176
            if ((err == MP_OKAY) && (rem != NULL)) {
14701
                /* Set used and mask off top digit of remainder. */
14702
92
                rem->used = (sp_size_t)((e + SP_WORD_SIZE - 1) >>
14703
92
                                        SP_WORD_SHIFT);
14704
92
                e &= SP_WORD_MASK;
14705
92
                if (e > 0) {
14706
39
                    rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
14707
39
                }
14708
14709
                /* Remove leading zeros from remainder. */
14710
92
                sp_clamp(rem);
14711
92
            #ifdef WOLFSSL_SP_INT_NEGATIVE
14712
92
                rem->sign = MP_ZPOS;
14713
92
            #endif
14714
92
            }
14715
176
        }
14716
299
    }
14717
14718
299
    return err;
14719
299
}
14720
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
14721
14722
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14723
    defined(HAVE_ECC)
14724
/* The bottom e bits: r = a & ((1 << e) - 1)
14725
 *
14726
 * @param  [in]   a  SP integer to reduce.
14727
 * @param  [in]   e  Modulus bits (modulus equals 2^e).
14728
 * @param  [out]  r  SP integer to hold result.
14729
 *
14730
 * @return  MP_OKAY on success.
14731
 * @return  MP_VAL when a or r is NULL, e is negative or e is too large for
14732
 *          result.
14733
 */
14734
int sp_mod_2d(const sp_int* a, int e, sp_int* r)
14735
77
{
14736
77
    int err = MP_OKAY;
14737
77
    sp_size_t digits = (sp_size_t)((e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT);
14738
14739
77
    if ((a == NULL) || (r == NULL) || (e < 0)) {
14740
0
        err = MP_VAL;
14741
0
    }
14742
77
    if ((err == MP_OKAY) && (digits > r->size)) {
14743
0
        err = MP_VAL;
14744
0
    }
14745
14746
77
    if (err == MP_OKAY) {
14747
        /* Copy a into r if not same pointer. */
14748
77
        if (a != r) {
14749
4
            XMEMCPY(r->dp, a->dp, digits * (word32)SP_WORD_SIZEOF);
14750
4
            r->used = a->used;
14751
4
        #ifdef WOLFSSL_SP_INT_NEGATIVE
14752
4
            r->sign = a->sign;
14753
4
        #endif
14754
4
        }
14755
14756
        /* Modify result if a is bigger or same digit size. */
14757
    #ifndef WOLFSSL_SP_INT_NEGATIVE
14758
        if (digits <= a->used)
14759
    #else
14760
        /* Need to make negative positive and mask. */
14761
77
        if ((a->sign == MP_NEG) || (digits <= a->used))
14762
62
    #endif
14763
62
        {
14764
62
        #ifdef WOLFSSL_SP_INT_NEGATIVE
14765
62
            if (a->sign == MP_NEG) {
14766
24
                unsigned int i;
14767
24
                sp_int_digit carry = 0;
14768
14769
                /* Negate value. */
14770
547
                for (i = 0; i < r->used; i++) {
14771
523
                    sp_int_digit next = r->dp[i] > 0;
14772
523
                    r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
14773
523
                    carry |= next;
14774
523
                }
14775
24
                for (; i < digits; i++) {
14776
0
                    r->dp[i] = (sp_int_digit)0 - carry;
14777
0
                }
14778
24
                r->sign = MP_ZPOS;
14779
24
            }
14780
62
        #endif
14781
            /* Set used and mask off top digit of result. */
14782
62
            r->used = digits;
14783
62
            e &= SP_WORD_MASK;
14784
62
            if (e > 0) {
14785
48
                r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
14786
48
            }
14787
62
            sp_clamp(r);
14788
62
        }
14789
77
    }
14790
14791
77
    return err;
14792
77
}
14793
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY)) || HAVE_ECC */
14794
14795
#if (defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
14796
    !defined(NO_DH))) || defined(OPENSSL_ALL)
14797
/* Multiply by 2^e: r = a << e
14798
 *
14799
 * @param  [in]   a  SP integer to multiply.
14800
 * @param  [in]   e  Multiplier bits (multiplier equals 2^e).
14801
 * @param  [out]  r  SP integer to hold result.
14802
 *
14803
 * @return  MP_OKAY on success.
14804
 * @return  MP_VAL when a or r is NULL, e is negative, or result is too big for
14805
 *          result size.
14806
 */
14807
int sp_mul_2d(const sp_int* a, int e, sp_int* r)
14808
295k
{
14809
295k
    int err = MP_OKAY;
14810
14811
    /* Validate parameters. */
14812
295k
    if ((a == NULL) || (r == NULL) || (e < 0)) {
14813
0
        err = MP_VAL;
14814
0
    }
14815
14816
    /* Ensure result has enough allocated digits for result. */
14817
295k
    if ((err == MP_OKAY) &&
14818
295k
            ((unsigned int)(sp_count_bits(a) + e) >
14819
295k
             (unsigned int)r->size * SP_WORD_SIZE)) {
14820
31
        err = MP_VAL;
14821
31
    }
14822
14823
295k
    if (err == MP_OKAY) {
14824
        /* Copy a into r as left shift function works on the number. */
14825
295k
        if (a != r) {
14826
10.9k
            err = sp_copy(a, r);
14827
10.9k
        }
14828
295k
    }
14829
14830
295k
    if (err == MP_OKAY) {
14831
#if 0
14832
        sp_print(a, "a");
14833
        sp_print_int(e, "n");
14834
#endif
14835
295k
        err = sp_lshb(r, e);
14836
#if 0
14837
        sp_print(r, "rsl");
14838
#endif
14839
295k
    }
14840
14841
295k
    return err;
14842
295k
}
14843
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
14844
14845
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
14846
    defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
14847
14848
/* START SP_SQR implementations */
14849
/* This code is generated.
14850
 * To generate:
14851
 *   cd scripts/sp/sp_int
14852
 *   ./gen.sh
14853
 * File sp_sqr.c contains code.
14854
 */
14855
14856
#if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
14857
#ifdef SQR_MUL_ASM
14858
/* Square a and store in r. r = a * a
14859
 *
14860
 * @param  [in]   a  SP integer to square.
14861
 * @param  [out]  r  SP integer result.
14862
 *
14863
 * @return  MP_OKAY on success.
14864
 * @return  MP_MEM when dynamic memory allocation fails.
14865
 */
14866
static int _sp_sqr(const sp_int* a, sp_int* r)
14867
{
14868
    int err = MP_OKAY;
14869
    sp_size_t i;
14870
    int j;
14871
    sp_size_t k;
14872
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14873
    sp_int_digit* t = NULL;
14874
#elif defined(WOLFSSL_SP_DYN_STACK)
14875
    sp_int_digit t[((a->used + 1) / 2) * 2 + 1];
14876
#else
14877
    sp_int_digit t[(SP_INT_DIGITS + 1) / 2];
14878
#endif
14879
14880
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14881
    t = (sp_int_digit*)XMALLOC(
14882
        sizeof(sp_int_digit) * (size_t)(((a->used + 1) / 2) * 2 + 1), NULL,
14883
        DYNAMIC_TYPE_BIGINT);
14884
    if (t == NULL) {
14885
        err = MP_MEM;
14886
    }
14887
#endif
14888
    if ((err == MP_OKAY) && (a->used <= 1)) {
14889
        sp_int_digit l;
14890
        sp_int_digit h;
14891
14892
        h = 0;
14893
        l = 0;
14894
        SP_ASM_SQR(h, l, a->dp[0]);
14895
        r->dp[0] = h;
14896
        r->dp[1] = l;
14897
    }
14898
    else if (err == MP_OKAY) {
14899
        sp_int_digit l;
14900
        sp_int_digit h;
14901
        sp_int_digit o;
14902
        sp_int_digit* p = t;
14903
14904
        h = 0;
14905
        l = 0;
14906
        SP_ASM_SQR(h, l, a->dp[0]);
14907
        t[0] = h;
14908
        h = 0;
14909
        o = 0;
14910
        for (k = 1; k < (sp_size_t)((a->used + 1) / 2); k++) {
14911
            i = k;
14912
            j = (int)(k - 1);
14913
            for (; (j >= 0); i++, j--) {
14914
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
14915
            }
14916
            t[k * 2 - 1] = l;
14917
            l = h;
14918
            h = o;
14919
            o = 0;
14920
14921
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
14922
            i = (sp_size_t)(k + 1);
14923
            j = (int)(k - 1);
14924
            for (; (j >= 0); i++, j--) {
14925
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
14926
            }
14927
            t[k * 2] = l;
14928
            l = h;
14929
            h = o;
14930
            o = 0;
14931
        }
14932
        for (; k < a->used; k++) {
14933
            i = k;
14934
            j = (int)(k - 1);
14935
            for (; (i < a->used); i++, j--) {
14936
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
14937
            }
14938
            p[k * 2 - 1] = l;
14939
            l = h;
14940
            h = o;
14941
            o = 0;
14942
14943
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
14944
            i = (sp_size_t)(k + 1);
14945
            j = (int)(k - 1);
14946
            for (; (i < a->used); i++, j--) {
14947
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
14948
            }
14949
            p[k * 2] = l;
14950
            l = h;
14951
            h = o;
14952
            o = 0;
14953
14954
            p = r->dp;
14955
        }
14956
        r->dp[k * 2 - 1] = l;
14957
        XMEMCPY(r->dp, t, (size_t)(((a->used + 1) / 2) * 2 + 1) *
14958
            sizeof(sp_int_digit));
14959
    }
14960
14961
    if (err == MP_OKAY) {
14962
        r->used = (sp_size_t)(a->used * 2U);
14963
        sp_clamp(r);
14964
    }
14965
14966
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14967
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
14968
#endif
14969
    return err;
14970
}
14971
#else /* !SQR_MUL_ASM */
14972
/* Square a and store in r. r = a * a
14973
 *
14974
 * @param  [in]   a  SP integer to square.
14975
 * @param  [out]  r  SP integer result.
14976
 *
14977
 * @return  MP_OKAY on success.
14978
 * @return  MP_MEM when dynamic memory allocation fails.
14979
 */
14980
static int _sp_sqr(const sp_int* a, sp_int* r)
14981
33.9M
{
14982
33.9M
    int err = MP_OKAY;
14983
33.9M
    sp_size_t i;
14984
33.9M
    int j;
14985
33.9M
    sp_size_t k;
14986
33.9M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14987
33.9M
    sp_int_digit* t = NULL;
14988
#elif defined(WOLFSSL_SP_DYN_STACK)
14989
    sp_int_digit t[a->used * 2];
14990
#else
14991
    sp_int_digit t[SP_INT_DIGITS];
14992
#endif
14993
14994
33.9M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14995
33.9M
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
14996
33.9M
                               (size_t)(a->used * 2), NULL,
14997
33.9M
                               DYNAMIC_TYPE_BIGINT);
14998
33.9M
    if (t == NULL) {
14999
666
        err = MP_MEM;
15000
666
    }
15001
33.9M
#endif
15002
33.9M
    if (err == MP_OKAY) {
15003
33.9M
    #ifndef WOLFSSL_SP_INT_SQR_VOLATILE
15004
33.9M
        sp_int_word w;
15005
33.9M
        sp_int_word l;
15006
33.9M
        sp_int_word h;
15007
    #else
15008
        volatile sp_int_word w;
15009
        volatile sp_int_word l;
15010
        volatile sp_int_word h;
15011
    #endif
15012
33.9M
    #ifdef SP_WORD_OVERFLOW
15013
33.9M
        sp_int_word o;
15014
33.9M
    #endif
15015
15016
33.9M
        w = (sp_int_word)a->dp[0] * a->dp[0];
15017
33.9M
        t[0] = (sp_int_digit)w;
15018
33.9M
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
15019
33.9M
        h = 0;
15020
33.9M
    #ifdef SP_WORD_OVERFLOW
15021
33.9M
        o = 0;
15022
33.9M
    #endif
15023
1.24G
        for (k = 1; k <= (sp_size_t)((a->used - 1) * 2); k++) {
15024
1.21G
            i = k / 2;
15025
1.21G
            j = (int)(k - i);
15026
1.21G
            if (i == (unsigned int)j) {
15027
607M
                w = (sp_int_word)a->dp[i] * a->dp[j];
15028
607M
                l += (sp_int_digit)w;
15029
607M
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15030
607M
            #ifdef SP_WORD_OVERFLOW
15031
607M
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15032
607M
                l &= SP_MASK;
15033
607M
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15034
607M
                h &= SP_MASK;
15035
607M
            #endif
15036
607M
            }
15037
9.92G
            for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
15038
8.71G
                w = (sp_int_word)a->dp[i] * a->dp[j];
15039
8.71G
                l += (sp_int_digit)w;
15040
8.71G
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15041
8.71G
            #ifdef SP_WORD_OVERFLOW
15042
8.71G
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15043
8.71G
                l &= SP_MASK;
15044
8.71G
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15045
8.71G
                h &= SP_MASK;
15046
8.71G
            #endif
15047
8.71G
                l += (sp_int_digit)w;
15048
8.71G
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15049
8.71G
            #ifdef SP_WORD_OVERFLOW
15050
8.71G
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15051
8.71G
                l &= SP_MASK;
15052
8.71G
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15053
8.71G
                h &= SP_MASK;
15054
8.71G
            #endif
15055
8.71G
            }
15056
1.21G
            t[k] = (sp_int_digit)l;
15057
1.21G
            l >>= SP_WORD_SIZE;
15058
1.21G
            l += (sp_int_digit)h;
15059
1.21G
            h >>= SP_WORD_SIZE;
15060
1.21G
        #ifdef SP_WORD_OVERFLOW
15061
1.21G
            h += o & SP_MASK;
15062
1.21G
            o >>= SP_WORD_SIZE;
15063
1.21G
        #endif
15064
1.21G
        }
15065
33.9M
        t[k] = (sp_int_digit)l;
15066
33.9M
        r->used = (sp_size_t)(k + 1);
15067
33.9M
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
15068
33.9M
        sp_clamp(r);
15069
33.9M
    }
15070
15071
33.9M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15072
33.9M
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
15073
33.9M
#endif
15074
33.9M
    return err;
15075
33.9M
}
15076
#endif /* SQR_MUL_ASM */
15077
#endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
15078
15079
#ifndef WOLFSSL_SP_SMALL
15080
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
15081
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
15082
#ifndef SQR_MUL_ASM
15083
/* Square a and store in r. r = a * a
15084
 *
15085
 * Long-hand implementation.
15086
 *
15087
 * @param  [in]   a  SP integer to square.
15088
 * @param  [out]  r  SP integer result.
15089
 *
15090
 * @return  MP_OKAY on success.
15091
 * @return  MP_MEM when dynamic memory allocation fails.
15092
 */
15093
static int _sp_sqr_4(const sp_int* a, sp_int* r)
15094
{
15095
    int err = MP_OKAY;
15096
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15097
    sp_int_word* w = NULL;
15098
#else
15099
    sp_int_word w[10];
15100
#endif
15101
    const sp_int_digit* da = a->dp;
15102
15103
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15104
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
15105
        DYNAMIC_TYPE_BIGINT);
15106
    if (w == NULL) {
15107
        err = MP_MEM;
15108
    }
15109
#endif
15110
15111
15112
    if (err == MP_OKAY) {
15113
        w[0] = (sp_int_word)da[0] * da[0];
15114
        w[1] = (sp_int_word)da[0] * da[1];
15115
        w[2] = (sp_int_word)da[0] * da[2];
15116
        w[3] = (sp_int_word)da[1] * da[1];
15117
        w[4] = (sp_int_word)da[0] * da[3];
15118
        w[5] = (sp_int_word)da[1] * da[2];
15119
        w[6] = (sp_int_word)da[1] * da[3];
15120
        w[7] = (sp_int_word)da[2] * da[2];
15121
        w[8] = (sp_int_word)da[2] * da[3];
15122
        w[9] = (sp_int_word)da[3] * da[3];
15123
15124
        r->dp[0] = (sp_int_digit)w[0];
15125
        w[0] >>= SP_WORD_SIZE;
15126
        w[0] += (sp_int_digit)w[1];
15127
        w[0] += (sp_int_digit)w[1];
15128
        r->dp[1] = (sp_int_digit)w[0];
15129
        w[0] >>= SP_WORD_SIZE;
15130
        w[1] >>= SP_WORD_SIZE;
15131
        w[0] += (sp_int_digit)w[1];
15132
        w[0] += (sp_int_digit)w[1];
15133
        w[0] += (sp_int_digit)w[2];
15134
        w[0] += (sp_int_digit)w[2];
15135
        w[0] += (sp_int_digit)w[3];
15136
        r->dp[2] = (sp_int_digit)w[0];
15137
        w[0] >>= SP_WORD_SIZE;
15138
        w[2] >>= SP_WORD_SIZE;
15139
        w[0] += (sp_int_digit)w[2];
15140
        w[0] += (sp_int_digit)w[2];
15141
        w[3] >>= SP_WORD_SIZE;
15142
        w[0] += (sp_int_digit)w[3];
15143
        w[0] += (sp_int_digit)w[4];
15144
        w[0] += (sp_int_digit)w[4];
15145
        w[0] += (sp_int_digit)w[5];
15146
        w[0] += (sp_int_digit)w[5];
15147
        r->dp[3] = (sp_int_digit)w[0];
15148
        w[0] >>= SP_WORD_SIZE;
15149
        w[4] >>= SP_WORD_SIZE;
15150
        w[0] += (sp_int_digit)w[4];
15151
        w[0] += (sp_int_digit)w[4];
15152
        w[5] >>= SP_WORD_SIZE;
15153
        w[0] += (sp_int_digit)w[5];
15154
        w[0] += (sp_int_digit)w[5];
15155
        w[0] += (sp_int_digit)w[6];
15156
        w[0] += (sp_int_digit)w[6];
15157
        w[0] += (sp_int_digit)w[7];
15158
        r->dp[4] = (sp_int_digit)w[0];
15159
        w[0] >>= SP_WORD_SIZE;
15160
        w[6] >>= SP_WORD_SIZE;
15161
        w[0] += (sp_int_digit)w[6];
15162
        w[0] += (sp_int_digit)w[6];
15163
        w[7] >>= SP_WORD_SIZE;
15164
        w[0] += (sp_int_digit)w[7];
15165
        w[0] += (sp_int_digit)w[8];
15166
        w[0] += (sp_int_digit)w[8];
15167
        r->dp[5] = (sp_int_digit)w[0];
15168
        w[0] >>= SP_WORD_SIZE;
15169
        w[8] >>= SP_WORD_SIZE;
15170
        w[0] += (sp_int_digit)w[8];
15171
        w[0] += (sp_int_digit)w[8];
15172
        w[0] += (sp_int_digit)w[9];
15173
        r->dp[6] = (sp_int_digit)w[0];
15174
        w[0] >>= SP_WORD_SIZE;
15175
        w[9] >>= SP_WORD_SIZE;
15176
        w[0] += (sp_int_digit)w[9];
15177
        r->dp[7] = (sp_int_digit)w[0];
15178
15179
        r->used = 8;
15180
        sp_clamp(r);
15181
    }
15182
15183
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15184
    XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
15185
#endif
15186
    return err;
15187
}
15188
#else /* SQR_MUL_ASM */
15189
/* Square a and store in r. r = a * a
15190
 *
15191
 * Comba implementation.
15192
 *
15193
 * @param  [in]   a  SP integer to square.
15194
 * @param  [out]  r  SP integer result.
15195
 *
15196
 * @return  MP_OKAY on success.
15197
 * @return  MP_MEM when dynamic memory allocation fails.
15198
 */
15199
static int _sp_sqr_4(const sp_int* a, sp_int* r)
15200
{
15201
    sp_int_digit l = 0;
15202
    sp_int_digit h = 0;
15203
    sp_int_digit o = 0;
15204
    sp_int_digit t[4];
15205
15206
    SP_ASM_SQR(h, l, a->dp[0]);
15207
    t[0] = h;
15208
    h = 0;
15209
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15210
    t[1] = l;
15211
    l = h;
15212
    h = o;
15213
    o = 0;
15214
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15215
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15216
    t[2] = l;
15217
    l = h;
15218
    h = o;
15219
    o = 0;
15220
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15221
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15222
    t[3] = l;
15223
    l = h;
15224
    h = o;
15225
    o = 0;
15226
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15227
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15228
    r->dp[4] = l;
15229
    l = h;
15230
    h = o;
15231
    o = 0;
15232
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
15233
    r->dp[5] = l;
15234
    l = h;
15235
    h = o;
15236
    SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
15237
    r->dp[6] = l;
15238
    r->dp[7] = h;
15239
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
15240
    r->used = 8;
15241
    sp_clamp(r);
15242
15243
    return MP_OKAY;
15244
}
15245
#endif /* SQR_MUL_ASM */
15246
#endif /* SP_WORD_SIZE == 64 */
15247
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
15248
#ifdef SQR_MUL_ASM
15249
/* Square a and store in r. r = a * a
15250
 *
15251
 * Comba implementation.
15252
 *
15253
 * @param  [in]   a  SP integer to square.
15254
 * @param  [out]  r  SP integer result.
15255
 *
15256
 * @return  MP_OKAY on success.
15257
 * @return  MP_MEM when dynamic memory allocation fails.
15258
 */
15259
static int _sp_sqr_6(const sp_int* a, sp_int* r)
15260
{
15261
    sp_int_digit l = 0;
15262
    sp_int_digit h = 0;
15263
    sp_int_digit o = 0;
15264
    sp_int_digit tl = 0;
15265
    sp_int_digit th = 0;
15266
    sp_int_digit to;
15267
    sp_int_digit t[6];
15268
15269
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15270
    to = 0;
15271
#endif
15272
15273
    SP_ASM_SQR(h, l, a->dp[0]);
15274
    t[0] = h;
15275
    h = 0;
15276
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15277
    t[1] = l;
15278
    l = h;
15279
    h = o;
15280
    o = 0;
15281
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15282
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15283
    t[2] = l;
15284
    l = h;
15285
    h = o;
15286
    o = 0;
15287
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15288
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15289
    t[3] = l;
15290
    l = h;
15291
    h = o;
15292
    o = 0;
15293
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15294
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15295
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15296
    t[4] = l;
15297
    l = h;
15298
    h = o;
15299
    o = 0;
15300
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15301
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15302
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15303
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15304
    t[5] = l;
15305
    l = h;
15306
    h = o;
15307
    o = 0;
15308
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
15309
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
15310
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15311
    r->dp[6] = l;
15312
    l = h;
15313
    h = o;
15314
    o = 0;
15315
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
15316
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
15317
    r->dp[7] = l;
15318
    l = h;
15319
    h = o;
15320
    o = 0;
15321
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
15322
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15323
    r->dp[8] = l;
15324
    l = h;
15325
    h = o;
15326
    o = 0;
15327
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
15328
    r->dp[9] = l;
15329
    l = h;
15330
    h = o;
15331
    SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
15332
    r->dp[10] = l;
15333
    r->dp[11] = h;
15334
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
15335
    r->used = 12;
15336
    sp_clamp(r);
15337
15338
    return MP_OKAY;
15339
}
15340
#endif /* SQR_MUL_ASM */
15341
#endif /* SP_WORD_SIZE == 64 */
15342
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
15343
#ifdef SQR_MUL_ASM
15344
/* Square a and store in r. r = a * a
15345
 *
15346
 * Comba implementation.
15347
 *
15348
 * @param  [in]   a  SP integer to square.
15349
 * @param  [out]  r  SP integer result.
15350
 *
15351
 * @return  MP_OKAY on success.
15352
 * @return  MP_MEM when dynamic memory allocation fails.
15353
 */
15354
static int _sp_sqr_8(const sp_int* a, sp_int* r)
15355
{
15356
    sp_int_digit l = 0;
15357
    sp_int_digit h = 0;
15358
    sp_int_digit o = 0;
15359
    sp_int_digit tl = 0;
15360
    sp_int_digit th = 0;
15361
    sp_int_digit to;
15362
    sp_int_digit t[8];
15363
15364
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15365
    to = 0;
15366
#endif
15367
15368
    SP_ASM_SQR(h, l, a->dp[0]);
15369
    t[0] = h;
15370
    h = 0;
15371
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15372
    t[1] = l;
15373
    l = h;
15374
    h = o;
15375
    o = 0;
15376
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15377
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15378
    t[2] = l;
15379
    l = h;
15380
    h = o;
15381
    o = 0;
15382
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15383
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15384
    t[3] = l;
15385
    l = h;
15386
    h = o;
15387
    o = 0;
15388
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15389
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15390
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15391
    t[4] = l;
15392
    l = h;
15393
    h = o;
15394
    o = 0;
15395
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15396
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15397
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15398
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15399
    t[5] = l;
15400
    l = h;
15401
    h = o;
15402
    o = 0;
15403
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15404
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15405
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15406
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15407
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15408
    t[6] = l;
15409
    l = h;
15410
    h = o;
15411
    o = 0;
15412
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15413
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15414
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15415
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15416
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15417
    t[7] = l;
15418
    l = h;
15419
    h = o;
15420
    o = 0;
15421
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
15422
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15423
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15424
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15425
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15426
    r->dp[8] = l;
15427
    l = h;
15428
    h = o;
15429
    o = 0;
15430
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
15431
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15432
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15433
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15434
    r->dp[9] = l;
15435
    l = h;
15436
    h = o;
15437
    o = 0;
15438
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
15439
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
15440
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15441
    r->dp[10] = l;
15442
    l = h;
15443
    h = o;
15444
    o = 0;
15445
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
15446
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
15447
    r->dp[11] = l;
15448
    l = h;
15449
    h = o;
15450
    o = 0;
15451
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
15452
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15453
    r->dp[12] = l;
15454
    l = h;
15455
    h = o;
15456
    o = 0;
15457
    SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
15458
    r->dp[13] = l;
15459
    l = h;
15460
    h = o;
15461
    SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
15462
    r->dp[14] = l;
15463
    r->dp[15] = h;
15464
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
15465
    r->used = 16;
15466
    sp_clamp(r);
15467
15468
    return MP_OKAY;
15469
}
15470
#endif /* SQR_MUL_ASM */
15471
#endif /* SP_WORD_SIZE == 32 */
15472
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
15473
#ifdef SQR_MUL_ASM
15474
/* Square a and store in r. r = a * a
15475
 *
15476
 * Comba implementation.
15477
 *
15478
 * @param  [in]   a  SP integer to square.
15479
 * @param  [out]  r  SP integer result.
15480
 *
15481
 * @return  MP_OKAY on success.
15482
 * @return  MP_MEM when dynamic memory allocation fails.
15483
 */
15484
static int _sp_sqr_12(const sp_int* a, sp_int* r)
15485
{
15486
    sp_int_digit l = 0;
15487
    sp_int_digit h = 0;
15488
    sp_int_digit o = 0;
15489
    sp_int_digit tl = 0;
15490
    sp_int_digit th = 0;
15491
    sp_int_digit to;
15492
    sp_int_digit t[12];
15493
15494
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15495
    to = 0;
15496
#endif
15497
15498
    SP_ASM_SQR(h, l, a->dp[0]);
15499
    t[0] = h;
15500
    h = 0;
15501
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15502
    t[1] = l;
15503
    l = h;
15504
    h = o;
15505
    o = 0;
15506
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15507
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15508
    t[2] = l;
15509
    l = h;
15510
    h = o;
15511
    o = 0;
15512
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15513
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15514
    t[3] = l;
15515
    l = h;
15516
    h = o;
15517
    o = 0;
15518
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15519
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15520
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15521
    t[4] = l;
15522
    l = h;
15523
    h = o;
15524
    o = 0;
15525
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15526
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15527
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15528
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15529
    t[5] = l;
15530
    l = h;
15531
    h = o;
15532
    o = 0;
15533
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15534
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15535
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15536
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15537
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15538
    t[6] = l;
15539
    l = h;
15540
    h = o;
15541
    o = 0;
15542
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15543
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15544
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15545
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15546
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15547
    t[7] = l;
15548
    l = h;
15549
    h = o;
15550
    o = 0;
15551
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
15552
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
15553
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15554
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15555
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15556
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15557
    t[8] = l;
15558
    l = h;
15559
    h = o;
15560
    o = 0;
15561
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
15562
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
15563
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
15564
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15565
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15566
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15567
    t[9] = l;
15568
    l = h;
15569
    h = o;
15570
    o = 0;
15571
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
15572
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
15573
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
15574
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
15575
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
15576
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15577
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15578
    t[10] = l;
15579
    l = h;
15580
    h = o;
15581
    o = 0;
15582
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
15583
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
15584
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
15585
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
15586
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
15587
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
15588
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15589
    t[11] = l;
15590
    l = h;
15591
    h = o;
15592
    o = 0;
15593
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
15594
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
15595
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
15596
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
15597
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
15598
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15599
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15600
    r->dp[12] = l;
15601
    l = h;
15602
    h = o;
15603
    o = 0;
15604
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
15605
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
15606
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
15607
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
15608
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
15609
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15610
    r->dp[13] = l;
15611
    l = h;
15612
    h = o;
15613
    o = 0;
15614
    SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
15615
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
15616
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
15617
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
15618
    SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
15619
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15620
    r->dp[14] = l;
15621
    l = h;
15622
    h = o;
15623
    o = 0;
15624
    SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
15625
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
15626
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
15627
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
15628
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15629
    r->dp[15] = l;
15630
    l = h;
15631
    h = o;
15632
    o = 0;
15633
    SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
15634
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
15635
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
15636
    SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
15637
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15638
    r->dp[16] = l;
15639
    l = h;
15640
    h = o;
15641
    o = 0;
15642
    SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
15643
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
15644
    SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
15645
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15646
    r->dp[17] = l;
15647
    l = h;
15648
    h = o;
15649
    o = 0;
15650
    SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
15651
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
15652
    SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
15653
    r->dp[18] = l;
15654
    l = h;
15655
    h = o;
15656
    o = 0;
15657
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
15658
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
15659
    r->dp[19] = l;
15660
    l = h;
15661
    h = o;
15662
    o = 0;
15663
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
15664
    SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
15665
    r->dp[20] = l;
15666
    l = h;
15667
    h = o;
15668
    o = 0;
15669
    SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
15670
    r->dp[21] = l;
15671
    l = h;
15672
    h = o;
15673
    SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
15674
    r->dp[22] = l;
15675
    r->dp[23] = h;
15676
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
15677
    r->used = 24;
15678
    sp_clamp(r);
15679
15680
    return MP_OKAY;
15681
}
15682
#endif /* SQR_MUL_ASM */
15683
#endif /* SP_WORD_SIZE == 32 */
15684
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
15685
15686
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
15687
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
15688
    (SP_WORD_SIZE == 64)))
15689
    #if SP_INT_DIGITS >= 32
15690
/* Square a and store in r. r = a * a
15691
 *
15692
 * Comba implementation.
15693
 *
15694
 * @param  [in]   a  SP integer to square.
15695
 * @param  [out]  r  SP integer result.
15696
 *
15697
 * @return  MP_OKAY on success.
15698
 * @return  MP_MEM when dynamic memory allocation fails.
15699
 */
15700
static int _sp_sqr_16(const sp_int* a, sp_int* r)
15701
{
15702
    int err = MP_OKAY;
15703
    sp_int_digit l = 0;
15704
    sp_int_digit h = 0;
15705
    sp_int_digit o = 0;
15706
    sp_int_digit tl = 0;
15707
    sp_int_digit th = 0;
15708
    sp_int_digit to;
15709
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15710
    sp_int_digit* t = NULL;
15711
#else
15712
    sp_int_digit t[16];
15713
#endif
15714
15715
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15716
    to = 0;
15717
#endif
15718
15719
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15720
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
15721
         DYNAMIC_TYPE_BIGINT);
15722
     if (t == NULL) {
15723
         err = MP_MEM;
15724
     }
15725
#endif
15726
    if (err == MP_OKAY) {
15727
        SP_ASM_SQR(h, l, a->dp[0]);
15728
        t[0] = h;
15729
        h = 0;
15730
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15731
        t[1] = l;
15732
        l = h;
15733
        h = o;
15734
        o = 0;
15735
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15736
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15737
        t[2] = l;
15738
        l = h;
15739
        h = o;
15740
        o = 0;
15741
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15742
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15743
        t[3] = l;
15744
        l = h;
15745
        h = o;
15746
        o = 0;
15747
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15748
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15749
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15750
        t[4] = l;
15751
        l = h;
15752
        h = o;
15753
        o = 0;
15754
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15755
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15756
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15757
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15758
        t[5] = l;
15759
        l = h;
15760
        h = o;
15761
        o = 0;
15762
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15763
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15764
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15765
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15766
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15767
        t[6] = l;
15768
        l = h;
15769
        h = o;
15770
        o = 0;
15771
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15772
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15773
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15774
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15775
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15776
        t[7] = l;
15777
        l = h;
15778
        h = o;
15779
        o = 0;
15780
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
15781
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
15782
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15783
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15784
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15785
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15786
        t[8] = l;
15787
        l = h;
15788
        h = o;
15789
        o = 0;
15790
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
15791
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
15792
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
15793
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15794
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15795
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15796
        t[9] = l;
15797
        l = h;
15798
        h = o;
15799
        o = 0;
15800
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
15801
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
15802
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
15803
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
15804
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
15805
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15806
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15807
        t[10] = l;
15808
        l = h;
15809
        h = o;
15810
        o = 0;
15811
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
15812
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
15813
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
15814
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
15815
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
15816
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
15817
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15818
        t[11] = l;
15819
        l = h;
15820
        h = o;
15821
        o = 0;
15822
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
15823
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
15824
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
15825
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
15826
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
15827
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
15828
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15829
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15830
        t[12] = l;
15831
        l = h;
15832
        h = o;
15833
        o = 0;
15834
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
15835
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
15836
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
15837
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
15838
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
15839
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
15840
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
15841
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15842
        t[13] = l;
15843
        l = h;
15844
        h = o;
15845
        o = 0;
15846
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
15847
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
15848
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
15849
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
15850
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
15851
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
15852
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
15853
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
15854
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15855
        t[14] = l;
15856
        l = h;
15857
        h = o;
15858
        o = 0;
15859
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
15860
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
15861
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
15862
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
15863
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
15864
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
15865
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
15866
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
15867
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15868
        t[15] = l;
15869
        l = h;
15870
        h = o;
15871
        o = 0;
15872
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
15873
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
15874
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
15875
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
15876
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
15877
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
15878
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
15879
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
15880
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15881
        r->dp[16] = l;
15882
        l = h;
15883
        h = o;
15884
        o = 0;
15885
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
15886
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
15887
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
15888
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
15889
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
15890
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
15891
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
15892
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15893
        r->dp[17] = l;
15894
        l = h;
15895
        h = o;
15896
        o = 0;
15897
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
15898
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
15899
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
15900
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
15901
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
15902
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
15903
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
15904
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15905
        r->dp[18] = l;
15906
        l = h;
15907
        h = o;
15908
        o = 0;
15909
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
15910
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
15911
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
15912
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
15913
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
15914
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
15915
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15916
        r->dp[19] = l;
15917
        l = h;
15918
        h = o;
15919
        o = 0;
15920
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
15921
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
15922
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
15923
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
15924
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
15925
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
15926
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15927
        r->dp[20] = l;
15928
        l = h;
15929
        h = o;
15930
        o = 0;
15931
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
15932
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
15933
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
15934
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
15935
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
15936
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15937
        r->dp[21] = l;
15938
        l = h;
15939
        h = o;
15940
        o = 0;
15941
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
15942
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
15943
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
15944
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
15945
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
15946
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15947
        r->dp[22] = l;
15948
        l = h;
15949
        h = o;
15950
        o = 0;
15951
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
15952
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
15953
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
15954
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
15955
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15956
        r->dp[23] = l;
15957
        l = h;
15958
        h = o;
15959
        o = 0;
15960
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
15961
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
15962
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
15963
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
15964
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15965
        r->dp[24] = l;
15966
        l = h;
15967
        h = o;
15968
        o = 0;
15969
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
15970
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
15971
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
15972
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15973
        r->dp[25] = l;
15974
        l = h;
15975
        h = o;
15976
        o = 0;
15977
        SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
15978
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
15979
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
15980
        r->dp[26] = l;
15981
        l = h;
15982
        h = o;
15983
        o = 0;
15984
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
15985
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
15986
        r->dp[27] = l;
15987
        l = h;
15988
        h = o;
15989
        o = 0;
15990
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
15991
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
15992
        r->dp[28] = l;
15993
        l = h;
15994
        h = o;
15995
        o = 0;
15996
        SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
15997
        r->dp[29] = l;
15998
        l = h;
15999
        h = o;
16000
        SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
16001
        r->dp[30] = l;
16002
        r->dp[31] = h;
16003
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
16004
        r->used = 32;
16005
        sp_clamp(r);
16006
    }
16007
16008
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16009
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
16010
#endif
16011
    return err;
16012
}
16013
    #endif /* SP_INT_DIGITS >= 32 */
16014
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
16015
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
16016
16017
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
16018
    #if SP_INT_DIGITS >= 48
16019
/* Square a and store in r. r = a * a
16020
 *
16021
 * Comba implementation.
16022
 *
16023
 * @param  [in]   a  SP integer to square.
16024
 * @param  [out]  r  SP integer result.
16025
 *
16026
 * @return  MP_OKAY on success.
16027
 * @return  MP_MEM when dynamic memory allocation fails.
16028
 */
16029
static int _sp_sqr_24(const sp_int* a, sp_int* r)
16030
{
16031
    int err = MP_OKAY;
16032
    sp_int_digit l = 0;
16033
    sp_int_digit h = 0;
16034
    sp_int_digit o = 0;
16035
    sp_int_digit tl = 0;
16036
    sp_int_digit th = 0;
16037
    sp_int_digit to;
16038
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16039
    sp_int_digit* t = NULL;
16040
#else
16041
    sp_int_digit t[24];
16042
#endif
16043
16044
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
16045
    to = 0;
16046
#endif
16047
16048
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16049
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
16050
         DYNAMIC_TYPE_BIGINT);
16051
     if (t == NULL) {
16052
         err = MP_MEM;
16053
     }
16054
#endif
16055
    if (err == MP_OKAY) {
16056
        SP_ASM_SQR(h, l, a->dp[0]);
16057
        t[0] = h;
16058
        h = 0;
16059
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
16060
        t[1] = l;
16061
        l = h;
16062
        h = o;
16063
        o = 0;
16064
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
16065
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
16066
        t[2] = l;
16067
        l = h;
16068
        h = o;
16069
        o = 0;
16070
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
16071
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
16072
        t[3] = l;
16073
        l = h;
16074
        h = o;
16075
        o = 0;
16076
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
16077
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
16078
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
16079
        t[4] = l;
16080
        l = h;
16081
        h = o;
16082
        o = 0;
16083
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
16084
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
16085
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
16086
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16087
        t[5] = l;
16088
        l = h;
16089
        h = o;
16090
        o = 0;
16091
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
16092
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
16093
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
16094
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
16095
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16096
        t[6] = l;
16097
        l = h;
16098
        h = o;
16099
        o = 0;
16100
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
16101
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
16102
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
16103
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
16104
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16105
        t[7] = l;
16106
        l = h;
16107
        h = o;
16108
        o = 0;
16109
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
16110
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
16111
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
16112
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
16113
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
16114
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16115
        t[8] = l;
16116
        l = h;
16117
        h = o;
16118
        o = 0;
16119
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
16120
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
16121
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
16122
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
16123
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
16124
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16125
        t[9] = l;
16126
        l = h;
16127
        h = o;
16128
        o = 0;
16129
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
16130
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
16131
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
16132
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
16133
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
16134
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
16135
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16136
        t[10] = l;
16137
        l = h;
16138
        h = o;
16139
        o = 0;
16140
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
16141
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
16142
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
16143
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
16144
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
16145
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
16146
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16147
        t[11] = l;
16148
        l = h;
16149
        h = o;
16150
        o = 0;
16151
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
16152
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
16153
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
16154
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
16155
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
16156
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
16157
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
16158
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16159
        t[12] = l;
16160
        l = h;
16161
        h = o;
16162
        o = 0;
16163
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
16164
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
16165
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
16166
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
16167
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
16168
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
16169
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
16170
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16171
        t[13] = l;
16172
        l = h;
16173
        h = o;
16174
        o = 0;
16175
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
16176
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
16177
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
16178
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
16179
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
16180
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
16181
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
16182
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
16183
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16184
        t[14] = l;
16185
        l = h;
16186
        h = o;
16187
        o = 0;
16188
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
16189
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
16190
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
16191
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
16192
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
16193
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
16194
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
16195
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
16196
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16197
        t[15] = l;
16198
        l = h;
16199
        h = o;
16200
        o = 0;
16201
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
16202
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
16203
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
16204
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
16205
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
16206
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
16207
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
16208
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
16209
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
16210
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16211
        t[16] = l;
16212
        l = h;
16213
        h = o;
16214
        o = 0;
16215
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
16216
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
16217
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
16218
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
16219
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
16220
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
16221
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
16222
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
16223
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
16224
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16225
        t[17] = l;
16226
        l = h;
16227
        h = o;
16228
        o = 0;
16229
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
16230
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
16231
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
16232
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
16233
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
16234
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
16235
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
16236
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
16237
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
16238
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
16239
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16240
        t[18] = l;
16241
        l = h;
16242
        h = o;
16243
        o = 0;
16244
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
16245
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
16246
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
16247
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
16248
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
16249
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
16250
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
16251
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
16252
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
16253
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
16254
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16255
        t[19] = l;
16256
        l = h;
16257
        h = o;
16258
        o = 0;
16259
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
16260
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
16261
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
16262
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
16263
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
16264
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
16265
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
16266
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
16267
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
16268
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
16269
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
16270
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16271
        t[20] = l;
16272
        l = h;
16273
        h = o;
16274
        o = 0;
16275
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
16276
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
16277
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
16278
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
16279
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
16280
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
16281
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
16282
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
16283
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
16284
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
16285
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
16286
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16287
        t[21] = l;
16288
        l = h;
16289
        h = o;
16290
        o = 0;
16291
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
16292
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
16293
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
16294
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
16295
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
16296
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
16297
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
16298
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
16299
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
16300
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
16301
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
16302
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
16303
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16304
        t[22] = l;
16305
        l = h;
16306
        h = o;
16307
        o = 0;
16308
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
16309
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
16310
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
16311
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
16312
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
16313
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
16314
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
16315
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
16316
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
16317
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
16318
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
16319
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
16320
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16321
        t[23] = l;
16322
        l = h;
16323
        h = o;
16324
        o = 0;
16325
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
16326
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
16327
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
16328
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
16329
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
16330
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
16331
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
16332
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
16333
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
16334
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
16335
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
16336
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
16337
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16338
        r->dp[24] = l;
16339
        l = h;
16340
        h = o;
16341
        o = 0;
16342
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
16343
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
16344
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
16345
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
16346
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
16347
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
16348
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
16349
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
16350
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
16351
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
16352
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
16353
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16354
        r->dp[25] = l;
16355
        l = h;
16356
        h = o;
16357
        o = 0;
16358
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
16359
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
16360
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
16361
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
16362
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
16363
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
16364
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
16365
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
16366
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
16367
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
16368
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
16369
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16370
        r->dp[26] = l;
16371
        l = h;
16372
        h = o;
16373
        o = 0;
16374
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
16375
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
16376
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
16377
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
16378
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
16379
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
16380
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
16381
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
16382
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
16383
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
16384
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16385
        r->dp[27] = l;
16386
        l = h;
16387
        h = o;
16388
        o = 0;
16389
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
16390
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
16391
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
16392
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
16393
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
16394
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
16395
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
16396
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
16397
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
16398
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
16399
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16400
        r->dp[28] = l;
16401
        l = h;
16402
        h = o;
16403
        o = 0;
16404
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
16405
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
16406
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
16407
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
16408
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
16409
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
16410
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
16411
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
16412
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
16413
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16414
        r->dp[29] = l;
16415
        l = h;
16416
        h = o;
16417
        o = 0;
16418
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
16419
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
16420
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
16421
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
16422
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
16423
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
16424
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
16425
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
16426
        SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
16427
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16428
        r->dp[30] = l;
16429
        l = h;
16430
        h = o;
16431
        o = 0;
16432
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
16433
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
16434
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
16435
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
16436
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
16437
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
16438
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
16439
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
16440
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16441
        r->dp[31] = l;
16442
        l = h;
16443
        h = o;
16444
        o = 0;
16445
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
16446
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
16447
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
16448
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
16449
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
16450
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
16451
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
16452
        SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
16453
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16454
        r->dp[32] = l;
16455
        l = h;
16456
        h = o;
16457
        o = 0;
16458
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
16459
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
16460
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
16461
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
16462
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
16463
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
16464
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
16465
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16466
        r->dp[33] = l;
16467
        l = h;
16468
        h = o;
16469
        o = 0;
16470
        SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
16471
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
16472
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
16473
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
16474
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
16475
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
16476
        SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
16477
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16478
        r->dp[34] = l;
16479
        l = h;
16480
        h = o;
16481
        o = 0;
16482
        SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
16483
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
16484
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
16485
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
16486
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
16487
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
16488
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16489
        r->dp[35] = l;
16490
        l = h;
16491
        h = o;
16492
        o = 0;
16493
        SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
16494
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
16495
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
16496
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
16497
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
16498
        SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
16499
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16500
        r->dp[36] = l;
16501
        l = h;
16502
        h = o;
16503
        o = 0;
16504
        SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
16505
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
16506
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
16507
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
16508
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
16509
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16510
        r->dp[37] = l;
16511
        l = h;
16512
        h = o;
16513
        o = 0;
16514
        SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
16515
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
16516
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
16517
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
16518
        SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
16519
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16520
        r->dp[38] = l;
16521
        l = h;
16522
        h = o;
16523
        o = 0;
16524
        SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
16525
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
16526
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
16527
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
16528
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16529
        r->dp[39] = l;
16530
        l = h;
16531
        h = o;
16532
        o = 0;
16533
        SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
16534
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
16535
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
16536
        SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
16537
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16538
        r->dp[40] = l;
16539
        l = h;
16540
        h = o;
16541
        o = 0;
16542
        SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
16543
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
16544
        SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
16545
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16546
        r->dp[41] = l;
16547
        l = h;
16548
        h = o;
16549
        o = 0;
16550
        SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
16551
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
16552
        SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
16553
        r->dp[42] = l;
16554
        l = h;
16555
        h = o;
16556
        o = 0;
16557
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
16558
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
16559
        r->dp[43] = l;
16560
        l = h;
16561
        h = o;
16562
        o = 0;
16563
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
16564
        SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
16565
        r->dp[44] = l;
16566
        l = h;
16567
        h = o;
16568
        o = 0;
16569
        SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
16570
        r->dp[45] = l;
16571
        l = h;
16572
        h = o;
16573
        SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
16574
        r->dp[46] = l;
16575
        r->dp[47] = h;
16576
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
16577
        r->used = 48;
16578
        sp_clamp(r);
16579
    }
16580
16581
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16582
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
16583
#endif
16584
    return err;
16585
}
16586
    #endif /* SP_INT_DIGITS >= 48 */
16587
16588
    #if SP_INT_DIGITS >= 64
16589
/* Square a and store in r. r = a * a
16590
 *
16591
 * Karatsuba implementation.
16592
 *
16593
 * @param  [in]   a  SP integer to square.
16594
 * @param  [out]  r  SP integer result.
16595
 *
16596
 * @return  MP_OKAY on success.
16597
 * @return  MP_MEM when dynamic memory allocation fails.
16598
 */
16599
static int _sp_sqr_32(const sp_int* a, sp_int* r)
16600
{
16601
    int err = MP_OKAY;
16602
    unsigned int i;
16603
    sp_int_digit l;
16604
    sp_int_digit h;
16605
    sp_int* z0;
16606
    sp_int* z1;
16607
    sp_int* z2;
16608
    sp_int_digit ca;
16609
    DECL_SP_INT(a1, 16);
16610
    DECL_SP_INT_ARRAY(z, 33, 2);
16611
16612
    ALLOC_SP_INT(a1, 16, err, NULL);
16613
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
16614
    if (err == MP_OKAY) {
16615
        z1 = z[0];
16616
        z2 = z[1];
16617
        z0 = r;
16618
16619
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
16620
        a1->used = 16;
16621
16622
        /* z2 = a1 ^ 2 */
16623
        err = _sp_sqr_16(a1, z2);
16624
    }
16625
    if (err == MP_OKAY) {
16626
        l = 0;
16627
        h = 0;
16628
        for (i = 0; i < 16; i++) {
16629
            SP_ASM_ADDC(l, h, a1->dp[i]);
16630
            SP_ASM_ADDC(l, h, a->dp[i]);
16631
            a1->dp[i] = l;
16632
            l = h;
16633
            h = 0;
16634
        }
16635
        ca = l;
16636
16637
        /* z0 = a0 ^ 2 */
16638
        err = _sp_sqr_16(a, z0);
16639
    }
16640
    if (err == MP_OKAY) {
16641
        /* z1 = (a0 + a1) ^ 2 */
16642
        err = _sp_sqr_16(a1, z1);
16643
    }
16644
    if (err == MP_OKAY) {
16645
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
16646
        /* r = z0 */
16647
        /* r += (z1 - z0 - z2) << 16 */
16648
        z1->dp[32] = ca;
16649
        l = 0;
16650
        if (ca) {
16651
            l = z1->dp[0 + 16];
16652
            h = 0;
16653
            SP_ASM_ADDC(l, h, a1->dp[0]);
16654
            SP_ASM_ADDC(l, h, a1->dp[0]);
16655
            z1->dp[0 + 16] = l;
16656
            l = h;
16657
            h = 0;
16658
            for (i = 1; i < 16; i++) {
16659
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
16660
                SP_ASM_ADDC(l, h, a1->dp[i]);
16661
                SP_ASM_ADDC(l, h, a1->dp[i]);
16662
                z1->dp[i + 16] = l;
16663
                l = h;
16664
                h = 0;
16665
            }
16666
        }
16667
        z1->dp[32] += l;
16668
        /* z1 = z1 - z0 - z1 */
16669
        l = z1->dp[0];
16670
        h = 0;
16671
        SP_ASM_SUBB(l, h, z0->dp[0]);
16672
        SP_ASM_SUBB(l, h, z2->dp[0]);
16673
        z1->dp[0] = l;
16674
        l = h;
16675
        h = 0;
16676
        for (i = 1; i < 32; i++) {
16677
            l += z1->dp[i];
16678
            SP_ASM_SUBB(l, h, z0->dp[i]);
16679
            SP_ASM_SUBB(l, h, z2->dp[i]);
16680
            z1->dp[i] = l;
16681
            l = h;
16682
            h = 0;
16683
        }
16684
        z1->dp[i] += l;
16685
        /* r += z1 << 16 */
16686
        l = 0;
16687
        h = 0;
16688
        for (i = 0; i < 16; i++) {
16689
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
16690
            SP_ASM_ADDC(l, h, z1->dp[i]);
16691
            r->dp[i + 16] = l;
16692
            l = h;
16693
            h = 0;
16694
        }
16695
        for (; i < 33; i++) {
16696
            SP_ASM_ADDC(l, h, z1->dp[i]);
16697
            r->dp[i + 16] = l;
16698
            l = h;
16699
            h = 0;
16700
        }
16701
        /* r += z2 << 32  */
16702
        l = 0;
16703
        h = 0;
16704
        for (i = 0; i < 17; i++) {
16705
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
16706
            SP_ASM_ADDC(l, h, z2->dp[i]);
16707
            r->dp[i + 32] = l;
16708
            l = h;
16709
            h = 0;
16710
        }
16711
        for (; i < 32; i++) {
16712
            SP_ASM_ADDC(l, h, z2->dp[i]);
16713
            r->dp[i + 32] = l;
16714
            l = h;
16715
            h = 0;
16716
        }
16717
        r->used = 64;
16718
        sp_clamp(r);
16719
    }
16720
16721
    FREE_SP_INT_ARRAY(z, NULL);
16722
    FREE_SP_INT(a1, NULL);
16723
    return err;
16724
}
16725
    #endif /* SP_INT_DIGITS >= 64 */
16726
16727
    #if SP_INT_DIGITS >= 96
16728
/* Square a and store in r. r = a * a
16729
 *
16730
 * Karatsuba implementation.
16731
 *
16732
 * @param  [in]   a  SP integer to square.
16733
 * @param  [out]  r  SP integer result.
16734
 *
16735
 * @return  MP_OKAY on success.
16736
 * @return  MP_MEM when dynamic memory allocation fails.
16737
 */
16738
static int _sp_sqr_48(const sp_int* a, sp_int* r)
16739
{
16740
    int err = MP_OKAY;
16741
    unsigned int i;
16742
    sp_int_digit l;
16743
    sp_int_digit h;
16744
    sp_int* z0;
16745
    sp_int* z1;
16746
    sp_int* z2;
16747
    sp_int_digit ca;
16748
    DECL_SP_INT(a1, 24);
16749
    DECL_SP_INT_ARRAY(z, 49, 2);
16750
16751
    ALLOC_SP_INT(a1, 24, err, NULL);
16752
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
16753
    if (err == MP_OKAY) {
16754
        z1 = z[0];
16755
        z2 = z[1];
16756
        z0 = r;
16757
16758
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
16759
        a1->used = 24;
16760
16761
        /* z2 = a1 ^ 2 */
16762
        err = _sp_sqr_24(a1, z2);
16763
    }
16764
    if (err == MP_OKAY) {
16765
        l = 0;
16766
        h = 0;
16767
        for (i = 0; i < 24; i++) {
16768
            SP_ASM_ADDC(l, h, a1->dp[i]);
16769
            SP_ASM_ADDC(l, h, a->dp[i]);
16770
            a1->dp[i] = l;
16771
            l = h;
16772
            h = 0;
16773
        }
16774
        ca = l;
16775
16776
        /* z0 = a0 ^ 2 */
16777
        err = _sp_sqr_24(a, z0);
16778
    }
16779
    if (err == MP_OKAY) {
16780
        /* z1 = (a0 + a1) ^ 2 */
16781
        err = _sp_sqr_24(a1, z1);
16782
    }
16783
    if (err == MP_OKAY) {
16784
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
16785
        /* r = z0 */
16786
        /* r += (z1 - z0 - z2) << 24 */
16787
        z1->dp[48] = ca;
16788
        l = 0;
16789
        if (ca) {
16790
            l = z1->dp[0 + 24];
16791
            h = 0;
16792
            SP_ASM_ADDC(l, h, a1->dp[0]);
16793
            SP_ASM_ADDC(l, h, a1->dp[0]);
16794
            z1->dp[0 + 24] = l;
16795
            l = h;
16796
            h = 0;
16797
            for (i = 1; i < 24; i++) {
16798
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
16799
                SP_ASM_ADDC(l, h, a1->dp[i]);
16800
                SP_ASM_ADDC(l, h, a1->dp[i]);
16801
                z1->dp[i + 24] = l;
16802
                l = h;
16803
                h = 0;
16804
            }
16805
        }
16806
        z1->dp[48] += l;
16807
        /* z1 = z1 - z0 - z1 */
16808
        l = z1->dp[0];
16809
        h = 0;
16810
        SP_ASM_SUBB(l, h, z0->dp[0]);
16811
        SP_ASM_SUBB(l, h, z2->dp[0]);
16812
        z1->dp[0] = l;
16813
        l = h;
16814
        h = 0;
16815
        for (i = 1; i < 48; i++) {
16816
            l += z1->dp[i];
16817
            SP_ASM_SUBB(l, h, z0->dp[i]);
16818
            SP_ASM_SUBB(l, h, z2->dp[i]);
16819
            z1->dp[i] = l;
16820
            l = h;
16821
            h = 0;
16822
        }
16823
        z1->dp[i] += l;
16824
        /* r += z1 << 16 */
16825
        l = 0;
16826
        h = 0;
16827
        for (i = 0; i < 24; i++) {
16828
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
16829
            SP_ASM_ADDC(l, h, z1->dp[i]);
16830
            r->dp[i + 24] = l;
16831
            l = h;
16832
            h = 0;
16833
        }
16834
        for (; i < 49; i++) {
16835
            SP_ASM_ADDC(l, h, z1->dp[i]);
16836
            r->dp[i + 24] = l;
16837
            l = h;
16838
            h = 0;
16839
        }
16840
        /* r += z2 << 48  */
16841
        l = 0;
16842
        h = 0;
16843
        for (i = 0; i < 25; i++) {
16844
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
16845
            SP_ASM_ADDC(l, h, z2->dp[i]);
16846
            r->dp[i + 48] = l;
16847
            l = h;
16848
            h = 0;
16849
        }
16850
        for (; i < 48; i++) {
16851
            SP_ASM_ADDC(l, h, z2->dp[i]);
16852
            r->dp[i + 48] = l;
16853
            l = h;
16854
            h = 0;
16855
        }
16856
        r->used = 96;
16857
        sp_clamp(r);
16858
    }
16859
16860
    FREE_SP_INT_ARRAY(z, NULL);
16861
    FREE_SP_INT(a1, NULL);
16862
    return err;
16863
}
16864
    #endif /* SP_INT_DIGITS >= 96 */
16865
16866
    #if SP_INT_DIGITS >= 128
16867
/* Square a and store in r. r = a * a
16868
 *
16869
 * Karatsuba implementation.
16870
 *
16871
 * @param  [in]   a  SP integer to square.
16872
 * @param  [out]  r  SP integer result.
16873
 *
16874
 * @return  MP_OKAY on success.
16875
 * @return  MP_MEM when dynamic memory allocation fails.
16876
 */
16877
static int _sp_sqr_64(const sp_int* a, sp_int* r)
16878
{
16879
    int err = MP_OKAY;
16880
    unsigned int i;
16881
    sp_int_digit l;
16882
    sp_int_digit h;
16883
    sp_int* z0;
16884
    sp_int* z1;
16885
    sp_int* z2;
16886
    sp_int_digit ca;
16887
    DECL_SP_INT(a1, 32);
16888
    DECL_SP_INT_ARRAY(z, 65, 2);
16889
16890
    ALLOC_SP_INT(a1, 32, err, NULL);
16891
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
16892
    if (err == MP_OKAY) {
16893
        z1 = z[0];
16894
        z2 = z[1];
16895
        z0 = r;
16896
16897
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
16898
        a1->used = 32;
16899
16900
        /* z2 = a1 ^ 2 */
16901
        err = _sp_sqr_32(a1, z2);
16902
    }
16903
    if (err == MP_OKAY) {
16904
        l = 0;
16905
        h = 0;
16906
        for (i = 0; i < 32; i++) {
16907
            SP_ASM_ADDC(l, h, a1->dp[i]);
16908
            SP_ASM_ADDC(l, h, a->dp[i]);
16909
            a1->dp[i] = l;
16910
            l = h;
16911
            h = 0;
16912
        }
16913
        ca = l;
16914
16915
        /* z0 = a0 ^ 2 */
16916
        err = _sp_sqr_32(a, z0);
16917
    }
16918
    if (err == MP_OKAY) {
16919
        /* z1 = (a0 + a1) ^ 2 */
16920
        err = _sp_sqr_32(a1, z1);
16921
    }
16922
    if (err == MP_OKAY) {
16923
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
16924
        /* r = z0 */
16925
        /* r += (z1 - z0 - z2) << 32 */
16926
        z1->dp[64] = ca;
16927
        l = 0;
16928
        if (ca) {
16929
            l = z1->dp[0 + 32];
16930
            h = 0;
16931
            SP_ASM_ADDC(l, h, a1->dp[0]);
16932
            SP_ASM_ADDC(l, h, a1->dp[0]);
16933
            z1->dp[0 + 32] = l;
16934
            l = h;
16935
            h = 0;
16936
            for (i = 1; i < 32; i++) {
16937
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
16938
                SP_ASM_ADDC(l, h, a1->dp[i]);
16939
                SP_ASM_ADDC(l, h, a1->dp[i]);
16940
                z1->dp[i + 32] = l;
16941
                l = h;
16942
                h = 0;
16943
            }
16944
        }
16945
        z1->dp[64] += l;
16946
        /* z1 = z1 - z0 - z1 */
16947
        l = z1->dp[0];
16948
        h = 0;
16949
        SP_ASM_SUBB(l, h, z0->dp[0]);
16950
        SP_ASM_SUBB(l, h, z2->dp[0]);
16951
        z1->dp[0] = l;
16952
        l = h;
16953
        h = 0;
16954
        for (i = 1; i < 64; i++) {
16955
            l += z1->dp[i];
16956
            SP_ASM_SUBB(l, h, z0->dp[i]);
16957
            SP_ASM_SUBB(l, h, z2->dp[i]);
16958
            z1->dp[i] = l;
16959
            l = h;
16960
            h = 0;
16961
        }
16962
        z1->dp[i] += l;
16963
        /* r += z1 << 16 */
16964
        l = 0;
16965
        h = 0;
16966
        for (i = 0; i < 32; i++) {
16967
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
16968
            SP_ASM_ADDC(l, h, z1->dp[i]);
16969
            r->dp[i + 32] = l;
16970
            l = h;
16971
            h = 0;
16972
        }
16973
        for (; i < 65; i++) {
16974
            SP_ASM_ADDC(l, h, z1->dp[i]);
16975
            r->dp[i + 32] = l;
16976
            l = h;
16977
            h = 0;
16978
        }
16979
        /* r += z2 << 64  */
16980
        l = 0;
16981
        h = 0;
16982
        for (i = 0; i < 33; i++) {
16983
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
16984
            SP_ASM_ADDC(l, h, z2->dp[i]);
16985
            r->dp[i + 64] = l;
16986
            l = h;
16987
            h = 0;
16988
        }
16989
        for (; i < 64; i++) {
16990
            SP_ASM_ADDC(l, h, z2->dp[i]);
16991
            r->dp[i + 64] = l;
16992
            l = h;
16993
            h = 0;
16994
        }
16995
        r->used = 128;
16996
        sp_clamp(r);
16997
    }
16998
16999
    FREE_SP_INT_ARRAY(z, NULL);
17000
    FREE_SP_INT(a1, NULL);
17001
    return err;
17002
}
17003
    #endif /* SP_INT_DIGITS >= 128 */
17004
17005
    #if SP_INT_DIGITS >= 192
17006
/* Square a and store in r. r = a * a
17007
 *
17008
 * Karatsuba implementation.
17009
 *
17010
 * @param  [in]   a  SP integer to square.
17011
 * @param  [out]  r  SP integer result.
17012
 *
17013
 * @return  MP_OKAY on success.
17014
 * @return  MP_MEM when dynamic memory allocation fails.
17015
 */
17016
static int _sp_sqr_96(const sp_int* a, sp_int* r)
17017
{
17018
    int err = MP_OKAY;
17019
    unsigned int i;
17020
    sp_int_digit l;
17021
    sp_int_digit h;
17022
    sp_int* z0;
17023
    sp_int* z1;
17024
    sp_int* z2;
17025
    sp_int_digit ca;
17026
    DECL_SP_INT(a1, 48);
17027
    DECL_SP_INT_ARRAY(z, 97, 2);
17028
17029
    ALLOC_SP_INT(a1, 48, err, NULL);
17030
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
17031
    if (err == MP_OKAY) {
17032
        z1 = z[0];
17033
        z2 = z[1];
17034
        z0 = r;
17035
17036
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
17037
        a1->used = 48;
17038
17039
        /* z2 = a1 ^ 2 */
17040
        err = _sp_sqr_48(a1, z2);
17041
    }
17042
    if (err == MP_OKAY) {
17043
        l = 0;
17044
        h = 0;
17045
        for (i = 0; i < 48; i++) {
17046
            SP_ASM_ADDC(l, h, a1->dp[i]);
17047
            SP_ASM_ADDC(l, h, a->dp[i]);
17048
            a1->dp[i] = l;
17049
            l = h;
17050
            h = 0;
17051
        }
17052
        ca = l;
17053
17054
        /* z0 = a0 ^ 2 */
17055
        err = _sp_sqr_48(a, z0);
17056
    }
17057
    if (err == MP_OKAY) {
17058
        /* z1 = (a0 + a1) ^ 2 */
17059
        err = _sp_sqr_48(a1, z1);
17060
    }
17061
    if (err == MP_OKAY) {
17062
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
17063
        /* r = z0 */
17064
        /* r += (z1 - z0 - z2) << 48 */
17065
        z1->dp[96] = ca;
17066
        l = 0;
17067
        if (ca) {
17068
            l = z1->dp[0 + 48];
17069
            h = 0;
17070
            SP_ASM_ADDC(l, h, a1->dp[0]);
17071
            SP_ASM_ADDC(l, h, a1->dp[0]);
17072
            z1->dp[0 + 48] = l;
17073
            l = h;
17074
            h = 0;
17075
            for (i = 1; i < 48; i++) {
17076
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
17077
                SP_ASM_ADDC(l, h, a1->dp[i]);
17078
                SP_ASM_ADDC(l, h, a1->dp[i]);
17079
                z1->dp[i + 48] = l;
17080
                l = h;
17081
                h = 0;
17082
            }
17083
        }
17084
        z1->dp[96] += l;
17085
        /* z1 = z1 - z0 - z1 */
17086
        l = z1->dp[0];
17087
        h = 0;
17088
        SP_ASM_SUBB(l, h, z0->dp[0]);
17089
        SP_ASM_SUBB(l, h, z2->dp[0]);
17090
        z1->dp[0] = l;
17091
        l = h;
17092
        h = 0;
17093
        for (i = 1; i < 96; i++) {
17094
            l += z1->dp[i];
17095
            SP_ASM_SUBB(l, h, z0->dp[i]);
17096
            SP_ASM_SUBB(l, h, z2->dp[i]);
17097
            z1->dp[i] = l;
17098
            l = h;
17099
            h = 0;
17100
        }
17101
        z1->dp[i] += l;
17102
        /* r += z1 << 16 */
17103
        l = 0;
17104
        h = 0;
17105
        for (i = 0; i < 48; i++) {
17106
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
17107
            SP_ASM_ADDC(l, h, z1->dp[i]);
17108
            r->dp[i + 48] = l;
17109
            l = h;
17110
            h = 0;
17111
        }
17112
        for (; i < 97; i++) {
17113
            SP_ASM_ADDC(l, h, z1->dp[i]);
17114
            r->dp[i + 48] = l;
17115
            l = h;
17116
            h = 0;
17117
        }
17118
        /* r += z2 << 96  */
17119
        l = 0;
17120
        h = 0;
17121
        for (i = 0; i < 49; i++) {
17122
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
17123
            SP_ASM_ADDC(l, h, z2->dp[i]);
17124
            r->dp[i + 96] = l;
17125
            l = h;
17126
            h = 0;
17127
        }
17128
        for (; i < 96; i++) {
17129
            SP_ASM_ADDC(l, h, z2->dp[i]);
17130
            r->dp[i + 96] = l;
17131
            l = h;
17132
            h = 0;
17133
        }
17134
        r->used = 192;
17135
        sp_clamp(r);
17136
    }
17137
17138
    FREE_SP_INT_ARRAY(z, NULL);
17139
    FREE_SP_INT(a1, NULL);
17140
    return err;
17141
}
17142
    #endif /* SP_INT_DIGITS >= 192 */
17143
17144
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
17145
#endif /* !WOLFSSL_SP_SMALL */
17146
17147
/* Square a and store in r. r = a * a
17148
 *
17149
 * @param  [in]   a  SP integer to square.
17150
 * @param  [out]  r  SP integer result.
17151
 *
17152
 * @return  MP_OKAY on success.
17153
 * @return  MP_VAL when a or r is NULL, or the result will be too big for fixed
17154
 *          data length.
17155
 * @return  MP_MEM when dynamic memory allocation fails.
17156
 */
17157
int sp_sqr(const sp_int* a, sp_int* r)
17158
7.70M
{
17159
#if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
17160
    return sp_mul(a, a, r);
17161
#else
17162
7.70M
    int err = MP_OKAY;
17163
17164
7.70M
    if ((a == NULL) || (r == NULL)) {
17165
0
        err = MP_VAL;
17166
0
    }
17167
    /* Need extra digit during calculation. */
17168
7.70M
    if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
17169
24
        err = MP_VAL;
17170
24
    }
17171
17172
#if 0
17173
    if (err == MP_OKAY) {
17174
        sp_print(a, "a");
17175
    }
17176
#endif
17177
17178
7.70M
    if (err == MP_OKAY) {
17179
7.70M
        if (a->used == 0) {
17180
58.6k
            _sp_zero(r);
17181
58.6k
        }
17182
7.64M
    else
17183
7.64M
#ifndef WOLFSSL_SP_SMALL
17184
7.64M
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
17185
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
17186
        if (a->used == 4) {
17187
            err = _sp_sqr_4(a, r);
17188
        }
17189
        else
17190
#endif /* SP_WORD_SIZE == 64 */
17191
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
17192
#ifdef SQR_MUL_ASM
17193
        if (a->used == 6) {
17194
            err = _sp_sqr_6(a, r);
17195
        }
17196
        else
17197
#endif /* SQR_MUL_ASM */
17198
#endif /* SP_WORD_SIZE == 64 */
17199
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
17200
#ifdef SQR_MUL_ASM
17201
        if (a->used == 8) {
17202
            err = _sp_sqr_8(a, r);
17203
        }
17204
        else
17205
#endif /* SQR_MUL_ASM */
17206
#endif /* SP_WORD_SIZE == 32 */
17207
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
17208
#ifdef SQR_MUL_ASM
17209
        if (a->used == 12) {
17210
            err = _sp_sqr_12(a, r);
17211
        }
17212
        else
17213
#endif /* SQR_MUL_ASM */
17214
#endif /* SP_WORD_SIZE == 32 */
17215
7.64M
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
17216
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
17217
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
17218
    (SP_WORD_SIZE == 64)))
17219
    #if SP_INT_DIGITS >= 32
17220
        if (a->used == 16) {
17221
            err = _sp_sqr_16(a, r);
17222
        }
17223
        else
17224
    #endif /* SP_INT_DIGITS >= 32 */
17225
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
17226
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
17227
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
17228
    #if SP_INT_DIGITS >= 48
17229
        if (a->used == 24) {
17230
            err = _sp_sqr_24(a, r);
17231
        }
17232
        else
17233
    #endif /* SP_INT_DIGITS >= 48 */
17234
    #if SP_INT_DIGITS >= 64
17235
        if (a->used == 32) {
17236
            err = _sp_sqr_32(a, r);
17237
        }
17238
        else
17239
    #endif /* SP_INT_DIGITS >= 64 */
17240
    #if SP_INT_DIGITS >= 96
17241
        if (a->used == 48) {
17242
            err = _sp_sqr_48(a, r);
17243
        }
17244
        else
17245
    #endif /* SP_INT_DIGITS >= 96 */
17246
    #if SP_INT_DIGITS >= 128
17247
        if (a->used == 64) {
17248
            err = _sp_sqr_64(a, r);
17249
        }
17250
        else
17251
    #endif /* SP_INT_DIGITS >= 128 */
17252
    #if SP_INT_DIGITS >= 192
17253
        if (a->used == 96) {
17254
            err = _sp_sqr_96(a, r);
17255
        }
17256
        else
17257
    #endif /* SP_INT_DIGITS >= 192 */
17258
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
17259
7.64M
#endif /* !WOLFSSL_SP_SMALL */
17260
7.64M
        {
17261
7.64M
            err = _sp_sqr(a, r);
17262
7.64M
        }
17263
7.70M
    }
17264
17265
7.70M
#ifdef WOLFSSL_SP_INT_NEGATIVE
17266
7.70M
    if (err == MP_OKAY) {
17267
7.70M
        r->sign = MP_ZPOS;
17268
7.70M
    }
17269
7.70M
#endif
17270
17271
#if 0
17272
    if (err == MP_OKAY) {
17273
        sp_print(r, "rsqr");
17274
    }
17275
#endif
17276
17277
7.70M
    return err;
17278
7.70M
#endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
17279
7.70M
}
17280
/* END SP_SQR implementations */
17281
17282
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
17283
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
17284
17285
#if defined(WOLFSSL_SP_MATH_ALL) || \
17286
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
17287
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || defined(HAVE_ECC)
17288
/* Square a mod m and store in r: r = (a * a) mod m
17289
 *
17290
 * @param  [in]   a  SP integer to square.
17291
 * @param  [in]   m  SP integer that is the modulus.
17292
 * @param  [out]  r  SP integer result.
17293
 *
17294
 * @return  MP_OKAY on success.
17295
 * @return  MP_MEM when dynamic memory allocation fails.
17296
 */
17297
static int _sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
17298
97
{
17299
97
    int err = MP_OKAY;
17300
    /* Create temporary for multiplication result. */
17301
97
    DECL_SP_INT(t, a->used * 2);
17302
17303
97
    ALLOC_SP_INT(t, a->used * 2, err, NULL);
17304
97
    if (err == MP_OKAY) {
17305
79
        err = sp_init_size(t, a->used * 2U);
17306
79
    }
17307
17308
    /* Square and reduce. */
17309
97
    if (err == MP_OKAY) {
17310
66
        err = sp_sqr(a, t);
17311
66
    }
17312
97
    if (err == MP_OKAY) {
17313
54
        err = sp_mod(t, m, r);
17314
54
    }
17315
17316
    /* Dispose of an allocated SP int. */
17317
97
    FREE_SP_INT(t, NULL);
17318
97
    return err;
17319
97
}
17320
17321
/* Square a mod m and store in r: r = (a * a) mod m
17322
 *
17323
 * @param  [in]   a  SP integer to square.
17324
 * @param  [in]   m  SP integer that is the modulus.
17325
 * @param  [out]  r  SP integer result.
17326
 *
17327
 * @return  MP_OKAY on success.
17328
 * @return  MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
17329
 *          for fixed data length.
17330
 * @return  MP_MEM when dynamic memory allocation fails.
17331
 */
17332
int sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
17333
6.67M
{
17334
6.67M
    int err = MP_OKAY;
17335
17336
    /* Validate parameters. */
17337
6.67M
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
17338
0
        err = MP_VAL;
17339
0
    }
17340
    /* Ensure r has space for intermediate result. */
17341
6.67M
    if ((err == MP_OKAY) && (r != m) && (a->used * 2 > r->size)) {
17342
36
        err = MP_VAL;
17343
36
    }
17344
    /* Ensure a is not too big. */
17345
6.67M
    if ((err == MP_OKAY) && (r == m) && (a->used * 2 > SP_INT_DIGITS)) {
17346
8
        err = MP_VAL;
17347
8
    }
17348
17349
    /* Use r as intermediate result if not same as pointer m which is needed
17350
     * after first intermediate result.
17351
     */
17352
6.67M
    if ((err == MP_OKAY) && (r != m)) {
17353
        /* Square and reduce. */
17354
6.67M
        err = sp_sqr(a, r);
17355
6.67M
        if (err == MP_OKAY) {
17356
6.67M
            err = sp_mod(r, m, r);
17357
6.67M
        }
17358
6.67M
    }
17359
141
    else if (err == MP_OKAY) {
17360
        /* Do operation with temporary. */
17361
97
        err = _sp_sqrmod(a, m, r);
17362
97
    }
17363
17364
6.67M
    return err;
17365
6.67M
}
17366
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
17367
17368
/**********************
17369
 * Montgomery functions
17370
 **********************/
17371
17372
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
17373
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
17374
    defined(OPENSSL_ALL)
17375
/* Reduce a number in Montgomery form.
17376
 *
17377
 * Assumes a and m are not NULL and m is not 0.
17378
 *
17379
 * DigitMask(a,i) := mask out the 'i'th digit in place.
17380
 *
17381
 * Algorithm:
17382
 *  1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
17383
 *  2. For i = 0..NumDigits(m)-1
17384
 *   2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK
17385
 *   2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask
17386
 *   2.3. a += mu * DigitMask(m, 0)
17387
 *   2.4. For j = 1 up to NumDigits(m)-2
17388
 *    2.4.1 a += mu * DigitMask(m, j)
17389
 *   2.5 a += mu * DigitMask(m, NumDigits(m)-1))
17390
 * 3. a >>= NumBits(m)
17391
 * 4. a = a % m
17392
 *
17393
 * @param  [in,out]  a   SP integer to Montgomery reduce.
17394
 * @param  [in]      m   SP integer that is the modulus.
17395
 * @param  [in]      mp  SP integer digit that is the bottom digit of inv(-m).
17396
 * @param  [in]      ct  Indicates operation must be constant time.
17397
 *
17398
 * @return  MP_OKAY on success.
17399
 */
17400
static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
17401
205M
{
17402
205M
#if !defined(SQR_MUL_ASM)
17403
205M
    unsigned int i;
17404
205M
    int bits;
17405
205M
    sp_int_word w;
17406
205M
    sp_int_digit mu;
17407
17408
#if 0
17409
    sp_print(a, "a");
17410
    sp_print(m, "m");
17411
#endif
17412
17413
    /* Count bits in modulus. */
17414
205M
    bits = sp_count_bits(m);
17415
17416
    /* Adding numbers into m->used * 2 digits - zero out unused digits. */
17417
205M
#ifndef WOLFSSL_NO_CT_OPS
17418
205M
    if (ct) {
17419
528k
        for (i = 0; i < (unsigned int)m->used * 2; i++) {
17420
519k
            a->dp[i] &=
17421
519k
                (sp_int_digit)
17422
519k
                (sp_int_sdigit)ctMaskIntGTE((int)(a->used-1), (int)i);
17423
519k
        }
17424
8.41k
    }
17425
205M
    else
17426
205M
#endif /* !WOLFSSL_NO_CT_OPS */
17427
205M
    {
17428
326M
        for (i = a->used; i < (unsigned int)m->used * 2; i++) {
17429
121M
            a->dp[i] = 0;
17430
121M
        }
17431
205M
    }
17432
17433
    /* Special case when modulus is 1 digit or less. */
17434
205M
    if (m->used <= 1) {
17435
        /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17436
11.1k
        mu = mp * a->dp[0];
17437
        /* a += mu * m */
17438
11.1k
        w = a->dp[0];
17439
11.1k
        w += (sp_int_word)mu * m->dp[0];
17440
11.1k
        a->dp[0] = (sp_int_digit)w;
17441
11.1k
        w >>= SP_WORD_SIZE;
17442
11.1k
        w += a->dp[1];
17443
11.1k
        a->dp[1] = (sp_int_digit)w;
17444
11.1k
        w >>= SP_WORD_SIZE;
17445
11.1k
        a->dp[2] = (sp_int_digit)w;
17446
11.1k
        a->used = 3;
17447
        /* mp is SP_WORD_SIZE */
17448
11.1k
        bits = SP_WORD_SIZE;
17449
11.1k
    }
17450
205M
    else {
17451
        /* 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
17452
         *    Mask when last digit of modulus doesn't have highest bit set.
17453
         */
17454
205M
        volatile sp_int_digit mask = (sp_int_digit)
17455
205M
            (((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1);
17456
        /* Overflow. */
17457
205M
        sp_int_word o = 0;
17458
17459
        /* 2. For i = 0..NumDigits(m)-1 */
17460
2.21G
        for (i = 0; i < m->used; i++) {
17461
2.00G
            unsigned int j;
17462
17463
            /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
17464
2.00G
            mu = mp * a->dp[i];
17465
            /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
17466
2.00G
            if ((i == (unsigned int)m->used - 1) && (mask != 0)) {
17467
86.5M
                mu &= mask;
17468
86.5M
            }
17469
17470
            /* 2.3. a += mu * DigitMask(m, 0) */
17471
2.00G
            w = a->dp[i];
17472
2.00G
            w += (sp_int_word)mu * m->dp[0];
17473
2.00G
            a->dp[i] = (sp_int_digit)w;
17474
2.00G
            w >>= SP_WORD_SIZE;
17475
            /* 2.4. For j = 1 up to NumDigits(m)-2 */
17476
42.6G
            for (j = 1; j < (unsigned int)m->used - 1; j++) {
17477
                /* 2.4.1 a += mu * DigitMask(m, j) */
17478
40.6G
                w += a->dp[i + j];
17479
40.6G
                w += (sp_int_word)mu * m->dp[j];
17480
40.6G
                a->dp[i + j] = (sp_int_digit)w;
17481
40.6G
                w >>= SP_WORD_SIZE;
17482
40.6G
            }
17483
            /* Handle overflow. */
17484
2.00G
            w += o;
17485
2.00G
            w += a->dp[i + j];
17486
2.00G
            o = (sp_int_digit)(w >> SP_WORD_SIZE);
17487
            /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1)) */
17488
2.00G
            w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
17489
2.00G
            a->dp[i + j] = (sp_int_digit)w;
17490
2.00G
            w >>= SP_WORD_SIZE;
17491
2.00G
            o += w;
17492
2.00G
        }
17493
        /* Handle overflow. */
17494
205M
        o += a->dp[m->used * 2 - 1];
17495
205M
        a->dp[m->used * 2 - 1] = (sp_int_digit)o;
17496
205M
        o >>= SP_WORD_SIZE;
17497
205M
        a->dp[m->used * 2] = (sp_int_digit)o;
17498
205M
        a->used = (sp_size_t)(m->used * 2 + 1);
17499
205M
    }
17500
17501
205M
    if (!ct) {
17502
        /* Remove leading zeros. */
17503
205M
        sp_clamp(a);
17504
        /* 3. a >>= NumBits(m) */
17505
205M
        (void)sp_rshb(a, bits, a);
17506
        /* 4. a = a mod m */
17507
205M
        if (_sp_cmp_abs(a, m) != MP_LT) {
17508
56.5M
            _sp_sub_off(a, m, a, 0);
17509
56.5M
        }
17510
205M
    }
17511
8.41k
    else {
17512
        /* 3. a >>= NumBits(m) */
17513
8.41k
        (void)sp_rshb(a, bits, a);
17514
        /* Constant time clamping. */
17515
8.41k
        sp_clamp_ct(a);
17516
17517
        /* 4. a = a mod m
17518
         * Always subtract but at a too high offset if a is less than m.
17519
         */
17520
8.41k
        _sp_submod_ct(a, m, m, m->used + 1U, a);
17521
8.41k
    }
17522
17523
17524
#if 0
17525
    sp_print(a, "rr");
17526
#endif
17527
17528
205M
    return MP_OKAY;
17529
#else /* !SQR_MUL_ASM */
17530
    unsigned int i;
17531
    unsigned int j;
17532
    int bits;
17533
    sp_int_digit mu;
17534
    sp_int_digit o;
17535
    volatile sp_int_digit mask;
17536
17537
#if 0
17538
    sp_print(a, "a");
17539
    sp_print(m, "m");
17540
#endif
17541
17542
    bits = sp_count_bits(m);
17543
    mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
17544
17545
#ifndef WOLFSSL_NO_CT_OPS
17546
    if (ct) {
17547
        for (i = 0; i < (unsigned int)m->used * 2; i++) {
17548
            a->dp[i] &=
17549
                (sp_int_digit)
17550
                (sp_int_sdigit)ctMaskIntGTE((int)(a->used-1), (int)i);
17551
        }
17552
    }
17553
    else
17554
#endif
17555
    {
17556
        for (i = a->used; i < (unsigned int)m->used * 2; i++) {
17557
            a->dp[i] = 0;
17558
        }
17559
    }
17560
17561
    if (m->used <= 1) {
17562
        sp_int_digit l;
17563
        sp_int_digit h;
17564
17565
        /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17566
        mu = mp * a->dp[0];
17567
        /* a += mu * m */
17568
        l = a->dp[0];
17569
        h = 0;
17570
        SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17571
        a->dp[0] = l;
17572
        l = h;
17573
        h = 0;
17574
        SP_ASM_ADDC(l, h, a->dp[1]);
17575
        a->dp[1] = l;
17576
        a->dp[2] = h;
17577
        a->used = (sp_size_t)(m->used * 2 + 1);
17578
        /* mp is SP_WORD_SIZE */
17579
        bits = SP_WORD_SIZE;
17580
    }
17581
#if !defined(WOLFSSL_SP_MATH) && defined(HAVE_ECC)
17582
#if SP_WORD_SIZE == 64
17583
#if SP_INT_DIGITS >= 8
17584
    else if ((m->used == 4) && (mask == 0)) {
17585
        sp_int_digit l;
17586
        sp_int_digit h;
17587
        sp_int_digit o2;
17588
17589
        l = 0;
17590
        h = 0;
17591
        o = 0;
17592
        o2 = 0;
17593
        /* For i = 0..NumDigits(m)-1 */
17594
        for (i = 0; i < 4; i++) {
17595
            /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17596
            mu = mp * a->dp[0];
17597
            l = a->dp[0];
17598
            /* a = (a + mu * m) >> WORD_SIZE */
17599
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17600
            l = h;
17601
            h = 0;
17602
            SP_ASM_ADDC(l, h, a->dp[1]);
17603
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
17604
            a->dp[0] = l;
17605
            l = h;
17606
            h = 0;
17607
            SP_ASM_ADDC(l, h, a->dp[2]);
17608
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
17609
            a->dp[1] = l;
17610
            l = h;
17611
            h = o2;
17612
            o2 = 0;
17613
            SP_ASM_ADDC_REG(l, h, o);
17614
            SP_ASM_ADDC(l, h, a->dp[i + 3]);
17615
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
17616
            a->dp[2] = l;
17617
            o = h;
17618
            l = h;
17619
            h = 0;
17620
        }
17621
        /* Handle overflow. */
17622
        SP_ASM_ADDC(l, o2, a->dp[7]);
17623
        a->dp[3] = l;
17624
        a->dp[4] = o2;
17625
        a->used = 5;
17626
17627
        /* Remove leading zeros. */
17628
        sp_clamp(a);
17629
17630
        /* a = a mod m */
17631
        if (_sp_cmp_abs(a, m) != MP_LT) {
17632
            _sp_sub_off(a, m, a, 0);
17633
        }
17634
17635
        return MP_OKAY;
17636
    }
17637
#endif /* SP_INT_DIGITS >= 8 */
17638
#if SP_INT_DIGITS >= 12
17639
    else if ((m->used == 6) && (mask == 0)) {
17640
        sp_int_digit l;
17641
        sp_int_digit h;
17642
        sp_int_digit o2;
17643
17644
        l = 0;
17645
        h = 0;
17646
        o = 0;
17647
        o2 = 0;
17648
        /* For i = 0..NumDigits(m)-1 */
17649
        for (i = 0; i < 6; i++) {
17650
            /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17651
            mu = mp * a->dp[0];
17652
            l = a->dp[0];
17653
            /* a = (a + mu * m) >> WORD_SIZE */
17654
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17655
            l = h;
17656
            h = 0;
17657
            SP_ASM_ADDC(l, h, a->dp[1]);
17658
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
17659
            a->dp[0] = l;
17660
            l = h;
17661
            h = 0;
17662
            SP_ASM_ADDC(l, h, a->dp[2]);
17663
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
17664
            a->dp[1] = l;
17665
            l = h;
17666
            h = 0;
17667
            SP_ASM_ADDC(l, h, a->dp[3]);
17668
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
17669
            a->dp[2] = l;
17670
            l = h;
17671
            h = 0;
17672
            SP_ASM_ADDC(l, h, a->dp[4]);
17673
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
17674
            a->dp[3] = l;
17675
            l = h;
17676
            h = o2;
17677
            o2 = 0;
17678
            SP_ASM_ADDC_REG(l, h, o);
17679
            SP_ASM_ADDC(l, h, a->dp[i + 5]);
17680
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
17681
            a->dp[4] = l;
17682
            o = h;
17683
            l = h;
17684
            h = 0;
17685
        }
17686
        /* Handle overflow. */
17687
        SP_ASM_ADDC(l, o2, a->dp[11]);
17688
        a->dp[5] = l;
17689
        a->dp[6] = o2;
17690
        a->used = 7;
17691
17692
        /* Remove leading zeros. */
17693
        sp_clamp(a);
17694
17695
        /* a = a mod m */
17696
        if (_sp_cmp_abs(a, m) != MP_LT) {
17697
            _sp_sub_off(a, m, a, 0);
17698
        }
17699
17700
        return MP_OKAY;
17701
    }
17702
#endif /* SP_INT_DIGITS >= 12 */
17703
#elif SP_WORD_SIZE == 32
17704
    else if ((m->used <= 12) && (mask == 0)) {
17705
        sp_int_digit l;
17706
        sp_int_digit h;
17707
        sp_int_digit o2;
17708
        sp_int_digit* ad;
17709
        const sp_int_digit* md;
17710
17711
        o = 0;
17712
        o2 = 0;
17713
        ad = a->dp;
17714
        /* For i = 0..NumDigits(m)-1 */
17715
        for (i = 0; i < m->used; i++) {
17716
            md = m->dp;
17717
            /*  mu = (mp * DigitMask(a, i)) & WORD_MASK */
17718
            mu = mp * ad[0];
17719
17720
            /* a = (a + mu * m, 0) >> WORD_SIZE */
17721
            l = ad[0];
17722
            h = 0;
17723
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17724
            l = h;
17725
            for (j = 1; j < (unsigned int)m->used - 2; j += 2) {
17726
                h = 0;
17727
                SP_ASM_ADDC(l, h, ad[j]);
17728
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17729
                ad[j - 1] = l;
17730
                l = 0;
17731
                SP_ASM_ADDC(h, l, ad[j + 1]);
17732
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
17733
                ad[j] = h;
17734
            }
17735
            for (; j < (unsigned int)m->used - 1; j++) {
17736
                h = 0;
17737
                SP_ASM_ADDC(l, h, ad[j]);
17738
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17739
                ad[j - 1] = l;
17740
                l = h;
17741
            }
17742
            h = o2;
17743
            o2 = 0;
17744
            SP_ASM_ADDC_REG(l, h, o);
17745
            SP_ASM_ADDC(l, h, ad[i + j]);
17746
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
17747
            ad[j - 1] = l;
17748
            o = h;
17749
        }
17750
        /* Handle overflow. */
17751
        SP_ASM_ADDC(o, o2, a->dp[m->used * 2 - 1]);
17752
        a->dp[m->used  - 1] = o;
17753
        a->dp[m->used] = o2;
17754
        a->used = m->used + 1;
17755
17756
        /* Remove leading zeros. */
17757
        sp_clamp(a);
17758
17759
        /* a = a mod m */
17760
        if (_sp_cmp_abs(a, m) != MP_LT) {
17761
            _sp_sub_off(a, m, a, 0);
17762
        }
17763
17764
        return MP_OKAY;
17765
    }
17766
#endif /* SP_WORD_SIZE == 64 | 32 */
17767
#endif /* !WOLFSSL_SP_MATH && HAVE_ECC */
17768
    else {
17769
        sp_int_digit l;
17770
        sp_int_digit h;
17771
        sp_int_digit o2;
17772
        sp_int_digit* ad;
17773
        const sp_int_digit* md;
17774
17775
        o = 0;
17776
        o2 = 0;
17777
        ad = a->dp;
17778
        /* 2. For i = 0..NumDigits(m)-1 */
17779
        for (i = 0; i < m->used; i++, ad++) {
17780
            md = m->dp;
17781
            /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
17782
            mu = mp * ad[0];
17783
            /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
17784
            if ((i == (unsigned int)m->used - 1) && (mask != 0)) {
17785
                mu &= mask;
17786
            }
17787
17788
            /* 2.3 a += mu * DigitMask(m, 0) */
17789
            l = ad[0];
17790
            h = 0;
17791
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17792
            ad[0] = l;
17793
            l = h;
17794
            /* 2.4. For j = 1 up to NumDigits(m)-2 */
17795
            for (j = 1; j < (unsigned int)m->used - 2; j += 2) {
17796
                h = 0;
17797
                /* 2.4.1. a += mu * DigitMask(m, j) */
17798
                SP_ASM_ADDC(l, h, ad[j + 0]);
17799
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17800
                ad[j + 0] = l;
17801
                l = 0;
17802
                /* 2.4.1. a += mu * DigitMask(m, j) */
17803
                SP_ASM_ADDC(h, l, ad[j + 1]);
17804
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
17805
                ad[j + 1] = h;
17806
            }
17807
            for (; j < (unsigned int)m->used - 1; j++) {
17808
                h = 0;
17809
                /* 2.4.1. a += mu * DigitMask(m, j) */
17810
                SP_ASM_ADDC(l, h, ad[j]);
17811
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17812
                ad[j] = l;
17813
                l = h;
17814
            }
17815
            h = o2;
17816
            o2 = 0;
17817
            SP_ASM_ADDC_REG(l, h, o);
17818
            /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
17819
            SP_ASM_ADDC(l, h, ad[j]);
17820
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
17821
            ad[j] = l;
17822
            o = h;
17823
        }
17824
        /* Handle overflow. */
17825
        SP_ASM_ADDC(o, o2, a->dp[m->used * 2 - 1]);
17826
        a->dp[m->used * 2 - 1] = o;
17827
        a->dp[m->used * 2] = o2;
17828
        a->used = (sp_size_t)(m->used * 2 + 1);
17829
    }
17830
17831
    if (!ct) {
17832
        /* Remove leading zeros. */
17833
        sp_clamp(a);
17834
        (void)sp_rshb(a, bits, a);
17835
        /* a = a mod m */
17836
        if (_sp_cmp_abs(a, m) != MP_LT) {
17837
            _sp_sub_off(a, m, a, 0);
17838
        }
17839
    }
17840
    else {
17841
        (void)sp_rshb(a, bits, a);
17842
        /* Constant time clamping. */
17843
        sp_clamp_ct(a);
17844
17845
        _sp_submod_ct(a, m, m, m->used + 1U, a);
17846
    }
17847
17848
#if 0
17849
    sp_print(a, "rr");
17850
#endif
17851
17852
    return MP_OKAY;
17853
#endif /* !SQR_MUL_ASM */
17854
205M
}
17855
17856
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
17857
    (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
17858
/* Reduce a number in Montgomery form.
17859
 *
17860
 * @param  [in,out]  a   SP integer to Montgomery reduce.
17861
 * @param  [in]      m   SP integer that is the modulus.
17862
 * @param  [in]      mp  SP integer digit that is the bottom digit of inv(-m).
17863
 * @param  [in]      ct  Indicates operation must be constant time.
17864
 *
17865
 * @return  MP_OKAY on success.
17866
 * @return  MP_VAL when a or m is NULL or m is zero.
17867
 */
17868
int sp_mont_red_ex(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
17869
127M
{
17870
127M
    int err;
17871
17872
    /* Validate parameters. */
17873
127M
    if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
17874
0
        err = MP_VAL;
17875
0
    }
17876
127M
#ifdef WOLFSSL_SP_INT_NEGATIVE
17877
127M
    else if ((a->sign == MP_NEG) || (m->sign == MP_NEG)) {
17878
139
        err = MP_VAL;
17879
139
    }
17880
127M
#endif
17881
    /* Ensure a has enough space for calculation. */
17882
127M
    else if (a->size < m->used * 2 + 1) {
17883
9
        err = MP_VAL;
17884
9
    }
17885
127M
    else {
17886
        /* Perform Montogomery Reduction. */
17887
127M
        err = _sp_mont_red(a, m, mp, ct);
17888
127M
    }
17889
17890
127M
    return err;
17891
127M
}
17892
#endif
17893
17894
/* Calculate the bottom digit of the inverse of negative m.
17895
 * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
17896
 *
17897
 * Used when performing Montgomery Reduction.
17898
 * m must be odd.
17899
 * Jeffrey Hurchalla's method.
17900
 *   https://arxiv.org/pdf/2204.04342.pdf
17901
 *
17902
 * @param  [in]   m   SP integer that is the modulus.
17903
 * @param  [out]  mp  SP integer digit that is the bottom digit of inv(-m).
17904
 */
17905
static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho)
17906
834k
{
17907
834k
    sp_int_digit d = m->dp[0];
17908
834k
    sp_int_digit x = (3 * d) ^ 2;
17909
834k
    sp_int_digit y = 1 - d * x;
17910
17911
#if SP_WORD_SIZE >= 16
17912
    x *= 1 + y; y *= y;
17913
#endif
17914
#if SP_WORD_SIZE >= 32
17915
    x *= 1 + y; y *= y;
17916
#endif
17917
#if SP_WORD_SIZE >= 64
17918
    x *= 1 + y; y *= y;
17919
#endif
17920
834k
    x *= 1 + y;
17921
17922
    /* rho = -1/m mod d, subtract x (unsigned) from 0, assign negative */
17923
834k
    *rho = (sp_int_digit)((sp_int_sdigit)0 - (sp_int_sdigit)x);
17924
834k
}
17925
17926
/* Calculate the bottom digit of the inverse of negative m.
17927
 * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
17928
 *
17929
 * Used when performing Montgomery Reduction.
17930
 *
17931
 * @param  [in]   m   SP integer that is the modulus.
17932
 * @param  [out]  mp  SP integer digit that is the bottom digit of inv(-m).
17933
 *
17934
 * @return  MP_OKAY on success.
17935
 * @return  MP_VAL when m or rho is NULL.
17936
 */
17937
int sp_mont_setup(const sp_int* m, sp_int_digit* rho)
17938
63.2k
{
17939
63.2k
    int err = MP_OKAY;
17940
17941
    /* Validate parameters. */
17942
63.2k
    if ((m == NULL) || (rho == NULL)) {
17943
0
        err = MP_VAL;
17944
0
    }
17945
    /* Calculation only works with odd modulus. */
17946
63.2k
    if ((err == MP_OKAY) && !sp_isodd(m)) {
17947
50
        err = MP_VAL;
17948
50
    }
17949
17950
63.2k
    if (err == MP_OKAY) {
17951
        /* Calculate negative of inverse mod 2^n. */
17952
63.2k
        _sp_mont_setup(m, rho);
17953
63.2k
    }
17954
17955
63.2k
    return err;
17956
63.2k
}
17957
17958
/* Calculate the normalization value of m.
17959
 *   norm = 2^k - m, where k is the number of bits in m
17960
 *
17961
 * @param  [out]  norm   SP integer that normalises numbers into Montgomery
17962
 *                       form.
17963
 * @param  [in]   m      SP integer that is the modulus.
17964
 *
17965
 * @return  MP_OKAY on success.
17966
 * @return  MP_VAL when norm or m is NULL, or number of bits in m is maximual.
17967
 */
17968
int sp_mont_norm(sp_int* norm, const sp_int* m)
17969
848k
{
17970
848k
    int err = MP_OKAY;
17971
848k
    unsigned int bits = 0;
17972
17973
    /* Validate parameters. */
17974
848k
    if ((norm == NULL) || (m == NULL)) {
17975
0
        err = MP_VAL;
17976
0
    }
17977
848k
    if (err == MP_OKAY) {
17978
        /* Find top bit and ensure norm has enough space. */
17979
848k
        bits = (unsigned int)sp_count_bits(m);
17980
        /* NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) */
17981
        /* clang-tidy falsely believes that norm->size was corrupted by the
17982
         * _sp_copy() to "Set real working value to base." in _sp_exptmod_ex().
17983
         */
17984
848k
        if (bits >= (unsigned int)norm->size * SP_WORD_SIZE) {
17985
72
            err = MP_VAL;
17986
72
        }
17987
        /* NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) */
17988
848k
    }
17989
848k
    if (err == MP_OKAY) {
17990
        /* Round up for case when m is less than a word - no advantage in using
17991
         * a smaller mask and would take more operations.
17992
         */
17993
848k
        if (bits < SP_WORD_SIZE) {
17994
531
            bits = SP_WORD_SIZE;
17995
531
        }
17996
        /* Smallest number greater than m of form 2^n. */
17997
848k
        _sp_zero(norm);
17998
848k
        err = sp_set_bit(norm, (int)bits);
17999
848k
    }
18000
848k
    if (err == MP_OKAY) {
18001
        /* norm = 2^n % m */
18002
848k
        err = sp_sub(norm, m, norm);
18003
848k
    }
18004
848k
    if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
18005
        /* Sub made norm one word and now finish calculation. */
18006
730
        norm->dp[0] %= m->dp[0];
18007
730
    }
18008
848k
    if (err == MP_OKAY) {
18009
        /* Remove leading zeros. */
18010
848k
        sp_clamp(norm);
18011
848k
    }
18012
18013
848k
    return err;
18014
848k
}
18015
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
18016
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
18017
18018
/*********************************
18019
 * To and from binary and strings.
18020
 *********************************/
18021
18022
/* Calculate the number of 8-bit values required to represent the
18023
 * multi-precision number.
18024
 *
18025
 * When a is NULL, return s 0.
18026
 *
18027
 * @param  [in]  a  SP integer.
18028
 *
18029
 * @return  The count of 8-bit values.
18030
 * @return  0 when a is NULL.
18031
 */
18032
int sp_unsigned_bin_size(const sp_int* a)
18033
233k
{
18034
233k
    int cnt = 0;
18035
18036
233k
    if (a != NULL) {
18037
233k
        cnt = (sp_count_bits(a) + 7) >> 3;
18038
233k
    }
18039
18040
233k
    return cnt;
18041
233k
}
18042
18043
/* Convert a number as an array of bytes in big-endian format to a
18044
 * multi-precision number.
18045
 *
18046
 * @param  [out]  a     SP integer.
18047
 * @param  [in]   in    Array of bytes.
18048
 * @param  [in]   inSz  Number of data bytes in array.
18049
 *
18050
 * @return  MP_OKAY on success.
18051
 * @return  MP_VAL when the number is too big to fit in an SP.
18052
 */
18053
int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
18054
14.1k
{
18055
14.1k
    int err = MP_OKAY;
18056
18057
    /* Validate parameters. */
18058
14.1k
    if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
18059
0
        err = MP_VAL;
18060
0
    }
18061
18062
    /* Check a has enough space for number. */
18063
14.1k
    if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
18064
2.40k
        err = MP_VAL;
18065
2.40k
    }
18066
18067
14.1k
    if (err == MP_OKAY) {
18068
        /* Load full digits at a time from in. */
18069
11.7k
        int i;
18070
11.7k
        int j = 0;
18071
18072
11.7k
        a->used = (sp_size_t)((inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF);
18073
18074
    #if defined(BIG_ENDIAN_ORDER) && !defined(WOLFSSL_SP_INT_DIGIT_ALIGN)
18075
        /* Data endian matches representation of number.
18076
         * Directly copy if we don't have alignment issues.
18077
         */
18078
        for (i = (int)(inSz-1); i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF) {
18079
            a->dp[j++] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
18080
        }
18081
    #else
18082
        /* Construct digit from required number of bytes. */
18083
345k
        for (i = (int)(inSz-1); i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
18084
333k
            a->dp[j]  = ((sp_int_digit)in[i - 0] <<  0)
18085
        #if SP_WORD_SIZE >= 16
18086
                      | ((sp_int_digit)in[i - 1] <<  8)
18087
        #endif
18088
        #if SP_WORD_SIZE >= 32
18089
                      | ((sp_int_digit)in[i - 2] << 16) |
18090
                        ((sp_int_digit)in[i - 3] << 24)
18091
        #endif
18092
        #if SP_WORD_SIZE >= 64
18093
                      | ((sp_int_digit)in[i - 4] << 32) |
18094
                        ((sp_int_digit)in[i - 5] << 40) |
18095
                        ((sp_int_digit)in[i - 6] << 48) |
18096
                        ((sp_int_digit)in[i - 7] << 56)
18097
        #endif
18098
333k
                                                       ;
18099
333k
            j++;
18100
333k
        }
18101
11.7k
    #endif
18102
18103
#if SP_WORD_SIZE >= 16
18104
        /* Handle leftovers. */
18105
        if (i >= 0) {
18106
    #ifdef BIG_ENDIAN_ORDER
18107
            int s;
18108
18109
            /* Place remaining bytes into last digit. */
18110
            a->dp[a->used - 1] = 0;
18111
            for (s = 0; i >= 0; i--,s += 8) {
18112
                a->dp[j] |= ((sp_int_digit)in[i]) << s;
18113
            }
18114
    #else
18115
            /* Cast digits to an array of bytes so we can insert directly. */
18116
            byte *d = (byte*)a->dp;
18117
18118
            /* Zero out all bytes in last digit. */
18119
            a->dp[a->used - 1] = 0;
18120
            /* Place remaining bytes directly into digit. */
18121
            switch (i) {
18122
            #if SP_WORD_SIZE >= 64
18123
                case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
18124
                case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
18125
                case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
18126
                case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
18127
            #endif
18128
            #if SP_WORD_SIZE >= 32
18129
                case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
18130
                case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
18131
            #endif
18132
                case 0: d[inSz - 1 - 0] = in[0];
18133
            }
18134
    #endif /* LITTLE_ENDIAN_ORDER */
18135
        }
18136
#endif
18137
11.7k
        sp_clamp_ct(a);
18138
11.7k
    }
18139
18140
14.1k
    return err;
18141
14.1k
}
18142
18143
/* Convert the multi-precision number to an array of bytes in big-endian format.
18144
 *
18145
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18146
 * to calculate the number of bytes required.
18147
 *
18148
 * @param  [in]   a    SP integer.
18149
 * @param  [out]  out  Array to put encoding into.
18150
 *
18151
 * @return  MP_OKAY on success.
18152
 * @return  MP_VAL when a or out is NULL.
18153
 */
18154
int sp_to_unsigned_bin(const sp_int* a, byte* out)
18155
53.1k
{
18156
    /* Write assuming output buffer is big enough. */
18157
53.1k
    return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
18158
53.1k
}
18159
18160
/* Convert the multi-precision number to an array of bytes in big-endian format.
18161
 *
18162
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18163
 * to calculate the number of bytes required.
18164
 * Front-pads the output array with zeros to make number the size of the array.
18165
 *
18166
 * @param  [in]   a      SP integer.
18167
 * @param  [out]  out    Array to put encoding into.
18168
 * @param  [in]   outSz  Size of the array in bytes.
18169
 *
18170
 * @return  MP_OKAY on success.
18171
 * @return  MP_VAL when a or out is NULL.
18172
 */
18173
int sp_to_unsigned_bin_len(const sp_int* a, byte* out, int outSz)
18174
12.6k
{
18175
12.6k
    int err = MP_OKAY;
18176
18177
    /* Validate parameters. */
18178
12.6k
    if ((a == NULL) || (out == NULL) || (outSz < 0)) {
18179
972
        err = MP_VAL;
18180
972
    }
18181
18182
#if SP_WORD_SIZE > 8
18183
    if (err == MP_OKAY) {
18184
        /* Start at the end of the buffer - least significant byte. */
18185
        int j = outSz - 1;
18186
18187
        if (!sp_iszero(a)) {
18188
            unsigned int i;
18189
18190
            /* Put each digit in. */
18191
            for (i = 0; (j >= 0) && (i < a->used); i++) {
18192
                int b;
18193
                sp_int_digit d = a->dp[i];
18194
                /* Place each byte of a digit into the buffer. */
18195
                for (b = 0; b < SP_WORD_SIZE; b += 8) {
18196
                    out[j--] = (byte)d;
18197
                    d >>= 8;
18198
                    /* Stop if the output buffer is filled. */
18199
                    if (j < 0) {
18200
                        if ((i < (unsigned int)a->used - 1) || (d > 0)) {
18201
                            err = MP_VAL;
18202
                        }
18203
                        break;
18204
                    }
18205
                }
18206
            }
18207
        }
18208
        /* Front pad buffer with 0s. */
18209
        for (; j >= 0; j--) {
18210
            out[j] = 0;
18211
        }
18212
    }
18213
#else
18214
12.6k
    if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
18215
16
        err = MP_VAL;
18216
16
    }
18217
12.6k
    if (err == MP_OKAY) {
18218
11.6k
        unsigned int i;
18219
11.6k
        int j;
18220
18221
11.6k
        XMEMSET(out, 0, (unsigned int)outSz - a->used);
18222
18223
210k
        for (i = 0, j = outSz - 1; i < a->used; i++, j--) {
18224
199k
            out[j] = a->dp[i];
18225
199k
        }
18226
11.6k
    }
18227
12.6k
#endif
18228
18229
12.6k
    return err;
18230
12.6k
}
18231
18232
/* Convert the multi-precision number to an array of bytes in big-endian format.
18233
 *
18234
 * Constant-time implementation.
18235
 *
18236
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18237
 * to calculate the number of bytes required.
18238
 * Front-pads the output array with zeros to make number the size of the array.
18239
 *
18240
 * @param  [in]   a      SP integer.
18241
 * @param  [out]  out    Array to put encoding into.
18242
 * @param  [in]   outSz  Size of the array in bytes.
18243
 *
18244
 * @return  MP_OKAY on success.
18245
 * @return  MP_VAL when a or out is NULL.
18246
 */
18247
int sp_to_unsigned_bin_len_ct(const sp_int* a, byte* out, int outSz)
18248
0
{
18249
0
    int err = MP_OKAY;
18250
18251
    /* Validate parameters. */
18252
0
    if ((a == NULL) || (out == NULL) || (outSz < 0)) {
18253
0
        err = MP_VAL;
18254
0
    }
18255
18256
#if SP_WORD_SIZE > 8
18257
    if (err == MP_OKAY) {
18258
        /* Start at the end of the buffer - least significant byte. */
18259
        int j;
18260
        unsigned int i;
18261
        volatile sp_int_digit mask = (sp_int_digit)-1;
18262
        sp_int_digit d;
18263
18264
        /* Put each digit in. */
18265
        i = 0;
18266
        for (j = outSz - 1; j >= 0; ) {
18267
            unsigned int b;
18268
            volatile unsigned int notFull = (i < (unsigned int)a->used - 1);
18269
18270
            d = a->dp[i];
18271
            /* Place each byte of a digit into the buffer. */
18272
            for (b = 0; (j >= 0) && (b < SP_WORD_SIZEOF); b++) {
18273
                out[j--] = (byte)(d & mask);
18274
                d >>= 8;
18275
            }
18276
            mask &= (sp_int_digit)(-(int)notFull);
18277
            i += (unsigned int)(1 & mask);
18278
        }
18279
    }
18280
#else
18281
0
    if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
18282
0
        err = MP_VAL;
18283
0
    }
18284
0
    if (err == MP_OKAY) {
18285
0
        unsigned int i;
18286
0
        int j;
18287
0
        volatile sp_int_digit mask = (sp_int_digit)-1;
18288
18289
0
        i = 0;
18290
0
        for (j = outSz - 1; j >= 0; j--) {
18291
0
            out[j] = a->dp[i] & mask;
18292
0
            mask &= (sp_int_digit)0 - (i < (unsigned int)a->used - 1);
18293
0
            i += (unsigned int)(1 & mask);
18294
0
        }
18295
0
    }
18296
0
#endif
18297
18298
0
    return err;
18299
0
}
18300
18301
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
18302
    !defined(WOLFSSL_RSA_VERIFY_ONLY)
18303
/* Store the number in big-endian format in array at an offset.
18304
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18305
 * to calculate the number of bytes required.
18306
 *
18307
 * @param  [in]   o    Offset into array o start encoding.
18308
 * @param  [in]   a    SP integer.
18309
 * @param  [out]  out  Array to put encoding into.
18310
 *
18311
 * @return  Index of next byte after data.
18312
 * @return  MP_VAL when a or out is NULL.
18313
 */
18314
int sp_to_unsigned_bin_at_pos(int o, const sp_int* a, unsigned char* out)
18315
0
{
18316
    /* Get length of data that will be written. */
18317
0
    int len = sp_unsigned_bin_size(a);
18318
    /* Write number to buffer at offset. */
18319
0
    int ret = sp_to_unsigned_bin_len(a, out + o, len);
18320
18321
0
    if (ret == MP_OKAY) {
18322
        /* Return offset of next byte after number. */
18323
0
        ret = o + len;
18324
0
    }
18325
18326
0
    return ret;
18327
0
}
18328
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
18329
18330
#ifdef WOLFSSL_SP_READ_RADIX_16
18331
/* Convert hexadecimal number as string in big-endian format to a
18332
 * multi-precision number.
18333
 *
18334
 * Assumes negative sign and leading zeros have been stripped.
18335
 *
18336
 * @param  [out]  a   SP integer.
18337
 * @param  [in]   in  NUL terminated string.
18338
 *
18339
 * @return  MP_OKAY on success.
18340
 * @return  MP_VAL when radix not supported, value is negative, or a character
18341
 *          is not valid.
18342
 */
18343
static int _sp_read_radix_16(sp_int* a, const char* in)
18344
381k
{
18345
381k
    int err = MP_OKAY;
18346
381k
    int i;
18347
381k
    unsigned int s = 0;
18348
381k
    sp_size_t j = 0;
18349
381k
    sp_int_digit d;
18350
    /* Skip whitespace at end of line */
18351
381k
    int eol_done = 0;
18352
18353
    /* Make all nibbles in digit 0. */
18354
381k
    d = 0;
18355
    /* Step through string a character at a time starting at end - least
18356
     * significant byte. */
18357
23.7M
    for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
18358
23.3M
        volatile char c = in[i];
18359
        /* Convert character from hex. */
18360
23.3M
        int ch = (int)HexCharToByte(c);
18361
        /* Check for invalid character. */
18362
23.3M
        if (ch < 0) {
18363
1.14k
            if (!eol_done && CharIsWhiteSpace(c))
18364
218
                continue;
18365
925
            err = MP_VAL;
18366
925
            break;
18367
1.14k
        }
18368
23.3M
        eol_done = 1;
18369
18370
        /* Check whether we have filled the digit. */
18371
23.3M
        if (s == SP_WORD_SIZE) {
18372
            /* Store digit and move index to next in a. */
18373
2.00M
            a->dp[j++] = d;
18374
            /* Fail if we are out of space in a. */
18375
2.00M
            if (j >= a->size) {
18376
122
                err = MP_VAL;
18377
122
                break;
18378
122
            }
18379
            /* Set shift back to 0 - lowest nibble. */
18380
2.00M
            s = 0;
18381
            /* Make all nibbles in digit 0. */
18382
2.00M
            d = 0;
18383
2.00M
        }
18384
18385
        /* Put next nibble into digit. */
18386
23.3M
        d |= ((sp_int_digit)ch) << s;
18387
        /* Update shift for next nibble. */
18388
23.3M
        s += 4;
18389
23.3M
    }
18390
18391
381k
    if (err == MP_OKAY) {
18392
        /* If space, store last digit. */
18393
380k
        if (j < a->size) {
18394
380k
            a->dp[j] = d;
18395
380k
        }
18396
        /* Update used count. */
18397
380k
        a->used = (sp_size_t)(j + 1U);
18398
        /* Remove leading zeros. */
18399
380k
        sp_clamp(a);
18400
380k
    }
18401
18402
381k
    return err;
18403
381k
}
18404
#endif /* WOLFSSL_SP_READ_RADIX_16 */
18405
18406
#ifdef WOLFSSL_SP_READ_RADIX_10
18407
/* Convert decimal number as string in big-endian format to a multi-precision
18408
 * number.
18409
 *
18410
 * Assumes negative sign and leading zeros have been stripped.
18411
 *
18412
 * @param  [out]  a   SP integer.
18413
 * @param  [in]   in  NUL terminated string.
18414
 *
18415
 * @return  MP_OKAY on success.
18416
 * @return  MP_VAL when radix not supported, value is negative, or a character
18417
 *          is not valid.
18418
 */
18419
static int _sp_read_radix_10(sp_int* a, const char* in)
18420
151k
{
18421
151k
    int  err = MP_OKAY;
18422
151k
    int  i;
18423
18424
    /* Start with a being zero. */
18425
151k
    _sp_zero(a);
18426
18427
    /* Process all characters. */
18428
6.21M
    for (i = 0; in[i] != '\0'; i++) {
18429
        /* Get character. */
18430
6.06M
        volatile char ch = in[i];
18431
        /* Check character is valid. */
18432
6.06M
        if ((ch >= '0') && (ch <= '9')) {
18433
            /* Assume '0'..'9' are continuous values as characters. */
18434
6.06M
            ch = (char)(ch - '0');
18435
6.06M
        }
18436
0
        else {
18437
0
            if (CharIsWhiteSpace(ch))
18438
0
                continue;
18439
            /* Return error on invalid character. */
18440
0
            err = MP_VAL;
18441
0
            break;
18442
0
        }
18443
18444
        /* Multiply a by 10. */
18445
6.06M
        err = _sp_mul_d(a, 10, a, 0);
18446
6.06M
        if (err != MP_OKAY) {
18447
88
            break;
18448
88
        }
18449
        /* Add character value. */
18450
6.06M
        err = _sp_add_d(a, (sp_int_digit)ch, a);
18451
6.06M
        if (err != MP_OKAY) {
18452
0
            break;
18453
0
        }
18454
6.06M
    }
18455
18456
151k
    return err;
18457
151k
}
18458
#endif /* WOLFSSL_SP_READ_RADIX_10 */
18459
18460
#if defined(WOLFSSL_SP_READ_RADIX_16) || defined(WOLFSSL_SP_READ_RADIX_10)
18461
/* Convert a number as string in big-endian format to a big number.
18462
 * Only supports base-16 (hexadecimal) and base-10 (decimal).
18463
 *
18464
 * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
18465
 *
18466
 * @param  [out]  a      SP integer.
18467
 * @param  [in]   in     NUL terminated string.
18468
 * @param  [in]   radix  Number of values in a digit.
18469
 *
18470
 * @return  MP_OKAY on success.
18471
 * @return  MP_VAL when a or in is NULL, radix not supported, value is negative,
18472
 *          or a character is not valid.
18473
 */
18474
int sp_read_radix(sp_int* a, const char* in, int radix)
18475
314k
{
18476
314k
    int err = MP_OKAY;
18477
314k
#ifdef WOLFSSL_SP_INT_NEGATIVE
18478
314k
    sp_uint8 sign = MP_ZPOS;
18479
314k
#endif
18480
18481
314k
    if ((a == NULL) || (in == NULL)) {
18482
0
        err = MP_VAL;
18483
0
    }
18484
18485
314k
    if (err == MP_OKAY) {
18486
    #ifndef WOLFSSL_SP_INT_NEGATIVE
18487
        if (*in == '-') {
18488
            err = MP_VAL;
18489
        }
18490
        else
18491
    #endif
18492
314k
        {
18493
314k
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18494
314k
            if (*in == '-') {
18495
                /* Make number negative if signed string. */
18496
8.03k
                sign = MP_NEG;
18497
8.03k
                in++;
18498
8.03k
            }
18499
314k
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18500
            /* Skip leading zeros. */
18501
1.25M
            while (*in == '0') {
18502
941k
                in++;
18503
941k
            }
18504
18505
314k
            if (radix == 16) {
18506
213k
                err = _sp_read_radix_16(a, in);
18507
213k
            }
18508
100k
        #ifdef WOLFSSL_SP_READ_RADIX_10
18509
100k
            else if (radix == 10) {
18510
100k
                err = _sp_read_radix_10(a, in);
18511
100k
            }
18512
0
        #endif
18513
0
            else {
18514
0
                err = MP_VAL;
18515
0
            }
18516
18517
314k
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18518
            /* Ensure not negative when zero. */
18519
314k
            if (err == MP_OKAY) {
18520
313k
                if (sp_iszero(a)) {
18521
79.7k
                    a->sign = MP_ZPOS;
18522
79.7k
                }
18523
233k
                else {
18524
233k
                    a->sign = sign;
18525
233k
                }
18526
313k
            }
18527
314k
        #endif
18528
314k
        }
18529
314k
    }
18530
18531
314k
    return err;
18532
314k
}
18533
#endif /* WOLFSSL_SP_READ_RADIX_16 || WOLFSSL_SP_READ_RADIX_10 */
18534
18535
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18536
    defined(WC_MP_TO_RADIX)
18537
/* Put the big-endian, hex string encoding of a into str.
18538
 *
18539
 * Assumes str is large enough for result.
18540
 * Use sp_radix_size() to calculate required length.
18541
 *
18542
 * @param  [in]   a    SP integer to convert.
18543
 * @param  [out]  str  String to hold hex string result.
18544
 *
18545
 * @return  MP_OKAY on success.
18546
 * @return  MP_VAL when a or str is NULL.
18547
 */
18548
int sp_tohex(const sp_int* a, char* str)
18549
15.2k
{
18550
15.2k
    int err = MP_OKAY;
18551
18552
    /* Validate parameters. */
18553
15.2k
    if ((a == NULL) || (str == NULL)) {
18554
0
        err = MP_VAL;
18555
0
    }
18556
18557
15.2k
    if (err == MP_OKAY) {
18558
        /* Quick out if number is zero. */
18559
15.2k
        if (sp_iszero(a) == MP_YES) {
18560
6.23k
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18561
            /* Make string represent complete bytes. */
18562
6.23k
            *str++ = '0';
18563
6.23k
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18564
6.23k
            *str++ = '0';
18565
6.23k
        }
18566
9.05k
        else {
18567
9.05k
            int i;
18568
9.05k
            int j;
18569
9.05k
            sp_int_digit d;
18570
18571
9.05k
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18572
9.05k
            if (a->sign == MP_NEG) {
18573
                /* Add negative sign character. */
18574
342
                *str = '-';
18575
342
                str++;
18576
342
            }
18577
9.05k
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18578
18579
            /* Start at last digit - most significant digit. */
18580
9.05k
            i = (int)(a->used - 1);
18581
9.05k
            d = a->dp[i];
18582
9.05k
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18583
            /* Find highest non-zero byte in most-significant word. */
18584
48.2k
            for (j = SP_WORD_SIZE - 8; j >= 0 && i >= 0; j -= 8) {
18585
                /* When a byte at this index is not 0 break out to start
18586
                 * writing.
18587
                 */
18588
48.2k
                if (((d >> j) & 0xff) != 0) {
18589
9.05k
                    break;
18590
9.05k
                }
18591
                /* Skip this digit if it was 0. */
18592
39.2k
                if (j == 0) {
18593
0
                    j = SP_WORD_SIZE - 8;
18594
0
                    d = a->dp[--i];
18595
0
                }
18596
39.2k
            }
18597
            /* Start with high nibble of byte. */
18598
9.05k
            j += 4;
18599
        #else
18600
            /* Find highest non-zero nibble in most-significant word. */
18601
            for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
18602
                /* When a nibble at this index is not 0 break out to start
18603
                 * writing.
18604
                 */
18605
                if (((d >> j) & 0xf) != 0) {
18606
                    break;
18607
                }
18608
                /* Skip this digit if it was 0. */
18609
                if (j == 0) {
18610
                    j = SP_WORD_SIZE - 4;
18611
                    d = a->dp[--i];
18612
                }
18613
            }
18614
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18615
            /* Write out as much as required from most-significant digit. */
18616
66.4k
            for (; j >= 0; j -= 4) {
18617
57.4k
                *(str++) = ByteToHex((byte)(d >> j));
18618
57.4k
            }
18619
            /* Write rest of digits. */
18620
64.3k
            for (--i; i >= 0; i--) {
18621
                /* Get digit from memory. */
18622
55.3k
                d = a->dp[i];
18623
                /* Write out all nibbles of digit. */
18624
474k
                for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
18625
419k
                    *(str++) = (char)ByteToHex((byte)(d >> j));
18626
419k
                }
18627
55.3k
            }
18628
9.05k
        }
18629
        /* Terminate string. */
18630
15.2k
        *str = '\0';
18631
15.2k
    }
18632
18633
15.2k
    return err;
18634
15.2k
}
18635
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18636
18637
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18638
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
18639
    defined(WC_MP_TO_RADIX)
18640
/* Put the big-endian, decimal string encoding of a into str.
18641
 *
18642
 * Assumes str is large enough for result.
18643
 * Use sp_radix_size() to calculate required length.
18644
 *
18645
 * @param  [in]   a    SP integer to convert.
18646
 * @param  [out]  str  String to hold hex string result.
18647
 *
18648
 * @return  MP_OKAY on success.
18649
 * @return  MP_VAL when a or str is NULL.
18650
 * @return  MP_MEM when dynamic memory allocation fails.
18651
 */
18652
int sp_todecimal(const sp_int* a, char* str)
18653
24.3k
{
18654
24.3k
    int err = MP_OKAY;
18655
24.3k
    int i;
18656
24.3k
    int j;
18657
24.3k
    sp_int_digit d = 0;
18658
18659
    /* Validate parameters. */
18660
24.3k
    if ((a == NULL) || (str == NULL)) {
18661
0
        err = MP_VAL;
18662
0
    }
18663
    /* Quick out if number is zero. */
18664
24.3k
    else if (sp_iszero(a) == MP_YES) {
18665
2.64k
        *str++ = '0';
18666
2.64k
        *str = '\0';
18667
2.64k
    }
18668
21.7k
    else if (a->used >= SP_INT_DIGITS) {
18669
10
        err = MP_VAL;
18670
10
    }
18671
21.7k
    else {
18672
        /* Temporary that is divided by 10. */
18673
21.7k
        DECL_SP_INT(t, a->used + 1);
18674
18675
21.7k
        ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
18676
21.7k
        if (err == MP_OKAY) {
18677
21.4k
            _sp_copy(a, t);
18678
21.4k
        }
18679
21.7k
        if (err == MP_OKAY) {
18680
21.4k
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18681
21.4k
            if (a->sign == MP_NEG) {
18682
                /* Add negative sign character. */
18683
395
                *str = '-';
18684
395
                str++;
18685
395
            }
18686
21.4k
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18687
18688
            /* Write out little endian. */
18689
21.4k
            i = 0;
18690
1.80M
            do {
18691
                /* Divide by 10 and get remainder of division. */
18692
1.80M
                (void)sp_div_d(t, 10, t, &d);
18693
                /* Write out remainder as a character. */
18694
1.80M
                str[i++] = (char)('0' + d);
18695
1.80M
            }
18696
            /* Keep going while we there is a value to write. */
18697
1.80M
            while (!sp_iszero(t));
18698
            /* Terminate string. */
18699
21.4k
            str[i] = '\0';
18700
18701
21.4k
            if (err == MP_OKAY) {
18702
                /* Reverse string to big endian. */
18703
928k
                for (j = 0; j <= (i - 1) / 2; j++) {
18704
906k
                    int c = (unsigned char)str[j];
18705
906k
                    str[j] = str[i - 1 - j];
18706
906k
                    str[i - 1 - j] = (char)c;
18707
906k
                }
18708
21.4k
            }
18709
21.4k
        }
18710
18711
21.7k
        FREE_SP_INT(t, NULL);
18712
21.7k
    }
18713
18714
24.3k
    return err;
18715
24.3k
}
18716
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18717
18718
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18719
    defined(WC_MP_TO_RADIX)
18720
/* Put the string version, big-endian, of a in str using the given radix.
18721
 *
18722
 * @param  [in]   a      SP integer to convert.
18723
 * @param  [out]  str    String to hold hex string result.
18724
 * @param  [in]   radix  Base of character.
18725
 *                       Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
18726
 *
18727
 * @return  MP_OKAY on success.
18728
 * @return  MP_VAL when a or str is NULL, or radix not supported.
18729
 */
18730
int sp_toradix(const sp_int* a, char* str, int radix)
18731
49.4k
{
18732
49.4k
    int err = MP_OKAY;
18733
18734
    /* Validate parameters. */
18735
49.4k
    if ((a == NULL) || (str == NULL)) {
18736
0
        err = MP_VAL;
18737
0
    }
18738
    /* Handle base 16 if requested. */
18739
49.4k
    else if (radix == MP_RADIX_HEX) {
18740
13.5k
        err = sp_tohex(a, str);
18741
13.5k
    }
18742
35.9k
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
18743
35.9k
    defined(HAVE_COMP_KEY)
18744
    /* Handle base 10 if requested. */
18745
35.9k
    else if (radix == MP_RADIX_DEC) {
18746
35.9k
        err = sp_todecimal(a, str);
18747
35.9k
    }
18748
0
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18749
0
    else {
18750
        /* Base not supported. */
18751
0
        err = MP_VAL;
18752
0
    }
18753
18754
49.4k
    return err;
18755
49.4k
}
18756
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18757
18758
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18759
    defined(WC_MP_TO_RADIX)
18760
/* Calculate the length of the string version, big-endian, of a using the given
18761
 * radix.
18762
 *
18763
 * @param  [in]   a      SP integer to convert.
18764
 * @param  [in]   radix  Base of character.
18765
 *                       Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
18766
 * @param  [out]  size   The number of characters in encoding.
18767
 *
18768
 * @return  MP_OKAY on success.
18769
 * @return  MP_VAL when a or size is NULL, or radix not supported.
18770
 */
18771
int sp_radix_size(const sp_int* a, int radix, int* size)
18772
41.4k
{
18773
41.4k
    int err = MP_OKAY;
18774
18775
    /* Validate parameters. */
18776
41.4k
    if ((a == NULL) || (size == NULL)) {
18777
0
        err = MP_VAL;
18778
0
    }
18779
    /* Handle base 16 if requested. */
18780
41.4k
    else if (radix == MP_RADIX_HEX) {
18781
2.12k
        if (a->used == 0) {
18782
286
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18783
            /* 00 and '\0' */
18784
286
            *size = 2 + 1;
18785
        #else
18786
            /* Zero and '\0' */
18787
            *size = 1 + 1;
18788
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18789
286
        }
18790
1.83k
        else {
18791
            /* Count of nibbles. */
18792
1.83k
            int cnt = (sp_count_bits(a) + 3) >> 2;
18793
1.83k
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18794
            /* Must have even number of nibbles to have complete bytes. */
18795
1.83k
            if (cnt & 1) {
18796
743
                cnt++;
18797
743
            }
18798
1.83k
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18799
1.83k
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18800
            /* Add to count of characters for negative sign. */
18801
1.83k
            if (a->sign == MP_NEG) {
18802
342
                cnt++;
18803
342
            }
18804
1.83k
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18805
            /* One more for \0 */
18806
1.83k
            *size = cnt + 1;
18807
1.83k
        }
18808
2.12k
    }
18809
39.2k
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
18810
39.2k
    defined(HAVE_COMP_KEY)
18811
    /* Handle base 10 if requested. */
18812
39.2k
    else if (radix == MP_RADIX_DEC) {
18813
24.8k
        int i;
18814
24.8k
        sp_int_digit d;
18815
18816
        /* quick out if its zero */
18817
24.8k
        if (sp_iszero(a) == MP_YES) {
18818
            /* Zero and '\0' */
18819
2.64k
            *size = 1 + 1;
18820
2.64k
        }
18821
22.2k
        else {
18822
22.2k
            DECL_SP_INT(t, a->used);
18823
18824
            /* Temporary to be divided by 10. */
18825
22.2k
            ALLOC_SP_INT(t, a->used, err, NULL);
18826
22.2k
            if (err == MP_OKAY) {
18827
21.7k
                t->size = a->used;
18828
21.7k
                _sp_copy(a, t);
18829
21.7k
            }
18830
18831
22.2k
            if (err == MP_OKAY) {
18832
                /* Count number of times number can be divided by 10. */
18833
1.87M
                for (i = 0; !sp_iszero(t); i++) {
18834
1.85M
                    (void)sp_div_d(t, 10, t, &d);
18835
1.85M
                }
18836
21.7k
            #ifdef WOLFSSL_SP_INT_NEGATIVE
18837
                /* Add to count of characters for negative sign. */
18838
21.7k
                if (a->sign == MP_NEG) {
18839
435
                    i++;
18840
435
                }
18841
21.7k
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
18842
                /* One more for \0 */
18843
21.7k
                *size = i + 1;
18844
21.7k
            }
18845
18846
22.2k
            FREE_SP_INT(t, NULL);
18847
22.2k
        }
18848
24.8k
    }
18849
14.4k
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18850
14.4k
    else {
18851
        /* Base not supported. */
18852
14.4k
        err = MP_VAL;
18853
14.4k
    }
18854
18855
41.4k
    return err;
18856
41.4k
}
18857
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18858
18859
/***************************************
18860
 * Prime number generation and checking.
18861
 ***************************************/
18862
18863
#if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
18864
    !defined(NO_DSA)) && !defined(WC_NO_RNG)
18865
#ifndef WOLFSSL_SP_MILLER_RABIN_CNT
18866
/* Always done 8 iterations of Miller-Rabin on check of primality when
18867
 * generating.
18868
 */
18869
34.1k
#define WOLFSSL_SP_MILLER_RABIN_CNT     8
18870
#endif
18871
18872
/* Generate a random prime for RSA only.
18873
 *
18874
 * @param  [out]  r     SP integer to hold result.
18875
 * @param  [in]   len   Number of bytes in prime. Use -ve to indicate the two
18876
 *                      lowest bits must be set.
18877
 * @param  [in]   rng   Random number generator.
18878
 * @param  [in]   heap  Heap hint. Unused.
18879
 *
18880
 * @return  MP_OKAY on success
18881
 * @return  MP_VAL when r or rng is NULL, length is not supported or random
18882
 *          number generator fails.
18883
 */
18884
int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
18885
986
{
18886
986
    static const byte USE_BBS = 3;
18887
986
    int  err = MP_OKAY;
18888
986
    byte low_bits = 1;
18889
986
    int  isPrime = MP_NO;
18890
986
#if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
18891
986
    int  bits = 0;
18892
986
#endif /* WOLFSSL_SP_MATH_ALL */
18893
986
    unsigned int digits = 0;
18894
18895
986
    (void)heap;
18896
18897
    /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
18898
986
    if ((r == NULL) || (rng == NULL) || (len == 0)) {
18899
32
        err = MP_VAL;
18900
32
    }
18901
18902
986
    if (err == MP_OKAY) {
18903
        /* Get type. */
18904
954
        if (len < 0) {
18905
0
            low_bits = USE_BBS;
18906
0
            len = -len;
18907
0
        }
18908
18909
        /* Get number of digits required to handle required number of bytes. */
18910
954
        digits = ((unsigned int)len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
18911
        /* Ensure result has space. */
18912
954
        if (r->size < digits) {
18913
41
            err = MP_VAL;
18914
41
        }
18915
954
    }
18916
18917
986
    if (err == MP_OKAY) {
18918
    #ifndef WOLFSSL_SP_MATH_ALL
18919
        /* For minimal maths, support only what's in SP and needed for DH. */
18920
    #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
18921
        if (len == 32) {
18922
        }
18923
        else
18924
    #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
18925
        /* Generate RSA primes that are half the modulus length. */
18926
    #ifdef WOLFSSL_SP_4096
18927
        if (len == 256) {
18928
            /* Support 2048-bit operations compiled in. */
18929
        }
18930
        else
18931
    #endif
18932
    #ifndef WOLFSSL_SP_NO_3072
18933
        if (len == 192) {
18934
            /* Support 1536-bit operations compiled in. */
18935
        }
18936
        else
18937
    #endif
18938
    #ifndef WOLFSSL_SP_NO_2048
18939
        if (len == 128) {
18940
            /* Support 1024-bit operations compiled in. */
18941
        }
18942
        else
18943
    #endif
18944
        {
18945
            /* Bit length not supported in SP. */
18946
            err = MP_VAL;
18947
        }
18948
    #endif /* !WOLFSSL_SP_MATH_ALL */
18949
18950
913
    #ifdef WOLFSSL_SP_INT_NEGATIVE
18951
        /* Generated number is always positive. */
18952
913
        r->sign = MP_ZPOS;
18953
913
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
18954
        /* Set number of digits that will be used. */
18955
913
        r->used = (sp_size_t)digits;
18956
913
    #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
18957
        /* Calculate number of bits in last digit. */
18958
913
        bits = (len * 8) & SP_WORD_MASK;
18959
913
    #endif /* WOLFSSL_SP_MATH_ALL || BIG_ENDIAN_ORDER */
18960
913
    }
18961
18962
    /* Assume the candidate is probably prime and then test until it is proven
18963
     * composite.
18964
     */
18965
35.0k
    while ((err == MP_OKAY) && (isPrime == MP_NO)) {
18966
#ifdef SHOW_GEN
18967
        printf(".");
18968
        fflush(stdout);
18969
#endif /* SHOW_GEN */
18970
        /* Generate bytes into digit array. */
18971
34.2k
        err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, (word32)len);
18972
34.2k
        if (err != 0) {
18973
155
            err = MP_VAL;
18974
155
            break;
18975
155
        }
18976
18977
        /* Set top bits to ensure bit length required is generated.
18978
         * Also set second top to help ensure product of two primes is
18979
         * going to be twice the number of bits of each.
18980
         */
18981
34.1k
#ifdef LITTLE_ENDIAN_ORDER
18982
34.1k
        ((byte*)r->dp)[len-1]             |= 0x80 | 0x40;
18983
#else
18984
        ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
18985
#endif /* LITTLE_ENDIAN_ORDER */
18986
18987
#ifdef BIG_ENDIAN_ORDER
18988
        /* Bytes were put into wrong place when less than full digit. */
18989
        if (bits != 0) {
18990
            r->dp[r->used - 1] >>= SP_WORD_SIZE - bits;
18991
        }
18992
#endif /* BIG_ENDIAN_ORDER */
18993
34.1k
#ifdef WOLFSSL_SP_MATH_ALL
18994
        /* Mask top digit when less than a digit requested. */
18995
34.1k
        if (bits > 0) {
18996
18.5k
            r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
18997
18.5k
        }
18998
34.1k
#endif /* WOLFSSL_SP_MATH_ALL */
18999
        /* Set mandatory low bits
19000
         *  - bottom bit to make odd.
19001
         *  - For BBS, second lowest too to make Blum integer (3 mod 4).
19002
         */
19003
34.1k
        r->dp[0] |= low_bits;
19004
19005
        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
19006
         * of a 1024-bit candidate being a false positive, when it is our
19007
         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
19008
         */
19009
34.1k
        err = sp_prime_is_prime_ex(r, WOLFSSL_SP_MILLER_RABIN_CNT, &isPrime,
19010
34.1k
            rng);
19011
34.1k
    }
19012
19013
986
    return err;
19014
986
}
19015
#endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
19016
19017
#ifdef WOLFSSL_SP_PRIME_GEN
19018
/* Miller-Rabin test of "a" to the base of "b" as described in
19019
 * HAC pp. 139 Algorithm 4.24
19020
 *
19021
 * Sets result to 0 if definitely composite or 1 if probably prime.
19022
 * Randomly the chance of error is no more than 1/4 and often
19023
 * very much lower.
19024
 *
19025
 * a is assumed to be odd.
19026
 *
19027
 * @param  [in]   a       SP integer to check.
19028
 * @param  [in]   b       SP integer that is a small prime.
19029
 * @param  [out]  result  MP_YES when number is likely prime.
19030
 *                        MP_NO otherwise.
19031
 * @param  [in]   n1      SP integer temporary.
19032
 * @param  [in]   r       SP integer temporary.
19033
 *
19034
 * @return  MP_OKAY on success.
19035
 * @return  MP_MEM when dynamic memory allocation fails.
19036
 */
19037
static int sp_prime_miller_rabin(const sp_int* a, sp_int* b, int* result,
19038
    sp_int* n1, sp_int* r)
19039
101k
{
19040
101k
    int err = MP_OKAY;
19041
101k
    int s = 0;
19042
101k
    sp_int* y = b;
19043
19044
    /* Assume not prime. */
19045
101k
    *result = MP_NO;
19046
19047
    /* Ensure small prime is 2 or more. */
19048
101k
    if (sp_cmp_d(b, 1) != MP_GT) {
19049
0
        err = MP_VAL;
19050
0
    }
19051
101k
    if (err == MP_OKAY) {
19052
        /* n1 = a - 1 (a is assumed odd.) */
19053
101k
        (void)sp_copy(a, n1);
19054
101k
        n1->dp[0]--;
19055
19056
        /* Set 2**s * r = n1 */
19057
        /* Count the number of least significant bits which are zero. */
19058
101k
        s = sp_cnt_lsb(n1);
19059
        /* Divide n - 1 by 2**s into r. */
19060
101k
        (void)sp_rshb(n1, s, r);
19061
19062
        /* Compute y = b**r mod a */
19063
101k
        err = sp_exptmod(b, r, a, y);
19064
101k
    }
19065
101k
    if (err == MP_OKAY) {
19066
        /* Assume probably prime until shown otherwise. */
19067
101k
        *result = MP_YES;
19068
19069
        /* If y != 1 and y != n1 do */
19070
101k
        if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
19071
63.5k
            int j = 1;
19072
            /* While j <= s-1 and y != n1 */
19073
667k
            while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
19074
                /* Square for bit shifted down. */
19075
603k
                err = sp_sqrmod(y, a, y);
19076
603k
                if (err != MP_OKAY) {
19077
30
                    break;
19078
30
                }
19079
19080
                /* If y == 1 then composite. */
19081
603k
                if (sp_cmp_d(y, 1) == MP_EQ) {
19082
0
                    *result = MP_NO;
19083
0
                    break;
19084
0
                }
19085
603k
                ++j;
19086
603k
            }
19087
19088
            /* If y != n1 then composite. */
19089
63.5k
            if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
19090
5.77k
                *result = MP_NO;
19091
5.77k
            }
19092
63.5k
        }
19093
101k
    }
19094
19095
101k
    return err;
19096
101k
}
19097
19098
#if SP_WORD_SIZE == 8
19099
/* Number of pre-computed primes. First n primes - fitting in a digit. */
19100
278k
#define SP_PRIME_SIZE      54
19101
19102
static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
19103
    0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
19104
    0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
19105
    0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
19106
    0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
19107
    0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
19108
    0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
19109
    0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
19110
};
19111
#else
19112
/* Number of pre-computed primes. First n primes. */
19113
#define SP_PRIME_SIZE      256
19114
19115
/* The first 256 primes. */
19116
static const sp_uint16 sp_primes[SP_PRIME_SIZE] = {
19117
    0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
19118
    0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
19119
    0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
19120
    0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
19121
    0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
19122
    0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
19123
    0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
19124
    0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
19125
19126
    0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
19127
    0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
19128
    0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
19129
    0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
19130
    0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
19131
    0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
19132
    0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
19133
    0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
19134
19135
    0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
19136
    0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
19137
    0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
19138
    0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
19139
    0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
19140
    0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
19141
    0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
19142
    0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
19143
19144
    0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
19145
    0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
19146
    0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
19147
    0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
19148
    0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
19149
    0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
19150
    0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
19151
    0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
19152
};
19153
#endif
19154
19155
/* Compare the first n primes with a.
19156
 *
19157
 * @param [in]  a       Number to check.
19158
 * @param [out] result  Whether number was found to be prime.
19159
 * @return  0 when no small prime matches.
19160
 * @return  1 when small prime matches.
19161
 */
19162
static WC_INLINE int sp_cmp_primes(const sp_int* a, int* result)
19163
850
{
19164
850
    int i;
19165
850
    int haveRes = 0;
19166
19167
850
    *result = MP_NO;
19168
    /* Check one digit a against primes table. */
19169
126k
    for (i = 0; i < SP_PRIME_SIZE; i++) {
19170
126k
        if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
19171
277
            *result = MP_YES;
19172
277
            haveRes = 1;
19173
277
            break;
19174
277
        }
19175
126k
    }
19176
19177
850
    return haveRes;
19178
850
}
19179
19180
/* Using composites is only faster when using 64-bit values. */
19181
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
19182
/* Number of composites. */
19183
#define SP_COMP_CNT     38
19184
19185
/* Products of small primes that fit into 64-bits. */
19186
static sp_int_digit sp_comp[SP_COMP_CNT] = {
19187
    0x088886ffdb344692, 0x34091fa96ffdf47b, 0x3c47d8d728a77ebb,
19188
    0x077ab7da9d709ea9, 0x310df3e7bd4bc897, 0xe657d7a1fd5161d1,
19189
    0x02ad3dbe0cca85ff, 0x0787f9a02c3388a7, 0x1113c5cc6d101657,
19190
    0x2456c94f936bdb15, 0x4236a30b85ffe139, 0x805437b38eada69d,
19191
    0x00723e97bddcd2af, 0x00a5a792ee239667, 0x00e451352ebca269,
19192
    0x013a7955f14b7805, 0x01d37cbd653b06ff, 0x0288fe4eca4d7cdf,
19193
    0x039fddb60d3af63d, 0x04cd73f19080fb03, 0x0639c390b9313f05,
19194
    0x08a1c420d25d388f, 0x0b4b5322977db499, 0x0e94c170a802ee29,
19195
    0x11f6a0e8356100df, 0x166c8898f7b3d683, 0x1babda0a0afd724b,
19196
    0x2471b07c44024abf, 0x2d866dbc2558ad71, 0x3891410d45fb47df,
19197
    0x425d5866b049e263, 0x51f767298e2cf13b, 0x6d9f9ece5fc74f13,
19198
    0x7f5ffdb0f56ee64d, 0x943740d46a1bc71f, 0xaf2d7ca25cec848f,
19199
    0xcec010484e4ad877, 0xef972c3cfafbcd25
19200
};
19201
19202
/* Index of next prime after those used to create composite. */
19203
static int sp_comp_idx[SP_COMP_CNT] = {
19204
     15,  25,  34,  42,  50,  58,  65,  72,  79,  86,  93, 100, 106, 112, 118,
19205
    124, 130, 136, 142, 148, 154, 160, 166, 172, 178, 184, 190, 196, 202, 208,
19206
    214, 220, 226, 232, 238, 244, 250, 256
19207
};
19208
#endif
19209
19210
/* Determines whether any of the first n small primes divide a evenly.
19211
 *
19212
 * @param [in]      a        Number to check.
19213
 * @param [in, out] haveRes  Boolean indicating a no prime result found.
19214
 * @param [in, out] result   Whether a is known to be prime.
19215
 * @return  MP_OKAY on success.
19216
 * @return  Negative on failure.
19217
 */
19218
static WC_INLINE int sp_div_primes(const sp_int* a, int* haveRes, int* result)
19219
7.03k
{
19220
7.03k
    int i;
19221
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
19222
    int j;
19223
#endif
19224
7.03k
    sp_int_digit d;
19225
7.03k
    int err = MP_OKAY;
19226
19227
7.03k
#if defined(WOLFSSL_SP_SMALL) || (SP_WORD_SIZE < 64)
19228
    /* Do trial division of a with all known small primes. */
19229
126k
    for (i = 0; i < SP_PRIME_SIZE; i++) {
19230
        /* Small prime divides a when remainder is 0. */
19231
124k
        err = sp_mod_d(a, (sp_int_digit)sp_primes[i], &d);
19232
124k
        if ((err != MP_OKAY) || (d == 0)) {
19233
5.34k
            *result = MP_NO;
19234
5.34k
            *haveRes = 1;
19235
5.34k
            break;
19236
5.34k
        }
19237
124k
    }
19238
#else
19239
    /* Start with first prime in composite. */
19240
    i = 0;
19241
    for (j = 0; (!(*haveRes)) && (j < SP_COMP_CNT); j++) {
19242
        /* Reduce a down to a single word.  */
19243
        err = sp_mod_d(a, sp_comp[j], &d);
19244
        if ((err != MP_OKAY) || (d == 0)) {
19245
            *result = MP_NO;
19246
            *haveRes = 1;
19247
            break;
19248
        }
19249
        /* Do trial division of d with small primes that make up composite. */
19250
        for (; i < sp_comp_idx[j]; i++) {
19251
            /* Small prime divides a when remainder is 0. */
19252
            if (d % sp_primes[i] == 0) {
19253
                *result = MP_NO;
19254
                *haveRes = 1;
19255
                break;
19256
            }
19257
        }
19258
    }
19259
#endif
19260
19261
7.03k
    return err;
19262
7.03k
}
19263
19264
/* Check whether a is prime by checking t iterations of Miller-Rabin.
19265
 *
19266
 * @param  [in]   a       SP integer to check.
19267
 * @param  [in]   trials  Number of trials of Miller-Rabin test to perform.
19268
 * @param  [out]  result  MP_YES when number is prime.
19269
 *                        MP_NO otherwise.
19270
 *
19271
 * @return  MP_OKAY on success.
19272
 * @return  MP_MEM when dynamic memory allocation fails.
19273
 */
19274
static int _sp_prime_trials(const sp_int* a, int trials, int* result)
19275
0
{
19276
0
    int err = MP_OKAY;
19277
0
    int i;
19278
0
    DECL_SP_INT(n1, a->used + 1);
19279
0
    DECL_SP_INT(r, a->used + 1);
19280
0
    DECL_SP_INT(b, a->used * 2 + 1);
19281
19282
0
    ALLOC_SP_INT(n1, a->used + 1, err, NULL);
19283
0
    ALLOC_SP_INT(r, a->used + 1, err, NULL);
19284
    /* Allocate number that will hold modular exponentiation result. */
19285
0
    ALLOC_SP_INT(b, a->used * 2 + 1, err, NULL);
19286
0
    if (err == MP_OKAY) {
19287
0
        _sp_init_size(n1, a->used + 1U);
19288
0
        _sp_init_size(r, a->used + 1U);
19289
0
        _sp_init_size(b, (sp_size_t)(a->used * 2U + 1U));
19290
19291
        /* Do requested number of trials of Miller-Rabin test. */
19292
0
        for (i = 0; i < trials; i++) {
19293
            /* Miller-Rabin test with known small prime. */
19294
0
            _sp_set(b, sp_primes[i]);
19295
0
            err = sp_prime_miller_rabin(a, b, result, n1, r);
19296
0
            if ((err != MP_OKAY) || (*result == MP_NO)) {
19297
0
                break;
19298
0
            }
19299
0
        }
19300
19301
        /* Clear temporary values. */
19302
0
        sp_clear(n1);
19303
0
        sp_clear(r);
19304
0
        sp_clear(b);
19305
0
    }
19306
19307
    /* Free allocated temporary. */
19308
0
    FREE_SP_INT(b, NULL);
19309
0
    FREE_SP_INT(r, NULL);
19310
0
    FREE_SP_INT(n1, NULL);
19311
0
    return err;
19312
0
}
19313
19314
/* Check whether a is prime.
19315
 * Checks against a number of small primes and does t iterations of
19316
 * Miller-Rabin.
19317
 *
19318
 * @param  [in]   a       SP integer to check.
19319
 * @param  [in]   trials  Number of trials of Miller-Rabin test to perform.
19320
 * @param  [out]  result  MP_YES when number is prime.
19321
 *                        MP_NO otherwise.
19322
 *
19323
 * @return  MP_OKAY on success.
19324
 * @return  MP_VAL when a or result is NULL, or trials is out of range.
19325
 * @return  MP_MEM when dynamic memory allocation fails.
19326
 */
19327
int sp_prime_is_prime(const sp_int* a, int trials, int* result)
19328
0
{
19329
0
    int         err = MP_OKAY;
19330
0
    int         haveRes = 0;
19331
19332
    /* Validate parameters. */
19333
0
    if ((a == NULL) || (result == NULL)) {
19334
0
        if (result != NULL) {
19335
0
            *result = MP_NO;
19336
0
        }
19337
0
        err = MP_VAL;
19338
0
    }
19339
0
    else if (a->used * 2 >= SP_INT_DIGITS) {
19340
0
        err = MP_VAL;
19341
0
    }
19342
    /* Check validity of Miller-Rabin iterations count.
19343
     * Must do at least one and need a unique pre-computed prime for each
19344
     * iteration.
19345
     */
19346
0
    if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
19347
0
        *result = MP_NO;
19348
0
        err = MP_VAL;
19349
0
    }
19350
19351
    /* Short-cut, 1 is not prime. */
19352
0
    if ((err == MP_OKAY) && sp_isone(a)) {
19353
0
        *result = MP_NO;
19354
0
        haveRes = 1;
19355
0
    }
19356
19357
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19358
19359
    /* Check against known small primes when a has 1 digit. */
19360
0
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
19361
0
            (a->dp[0] <= sp_primes[SP_PRIME_SIZE - 1])) {
19362
0
        haveRes = sp_cmp_primes(a, result);
19363
0
    }
19364
19365
    /* Check all small primes for even divisibility. */
19366
0
    if ((err == MP_OKAY) && (!haveRes)) {
19367
0
        err = sp_div_primes(a, &haveRes, result);
19368
0
    }
19369
19370
    /* Check a number of iterations of Miller-Rabin with small primes. */
19371
0
    if ((err == MP_OKAY) && (!haveRes)) {
19372
0
        err = _sp_prime_trials(a, trials, result);
19373
0
    }
19374
19375
0
    RESTORE_VECTOR_REGISTERS();
19376
19377
0
    return err;
19378
0
}
19379
19380
#ifndef WC_NO_RNG
19381
/* Check whether a is prime by doing t iterations of Miller-Rabin.
19382
 *
19383
 * t random numbers should give a (1/4)^t chance of a false prime.
19384
 *
19385
 * @param  [in]   a       SP integer to check.
19386
 * @param  [in]   trials  Number of iterations of Miller-Rabin test to perform.
19387
 * @param  [out]  result  MP_YES when number is prime.
19388
 *                        MP_NO otherwise.
19389
 * @param  [in]   rng     Random number generator for Miller-Rabin testing.
19390
 *
19391
 * @return  MP_OKAY on success.
19392
 * @return  MP_VAL when a, result or rng is NULL.
19393
 * @return  MP_MEM when dynamic memory allocation fails.
19394
 */
19395
static int _sp_prime_random_trials(const sp_int* a, int trials, int* result,
19396
    WC_RNG* rng)
19397
7.46k
{
19398
7.46k
    int err = MP_OKAY;
19399
7.46k
    int bits = sp_count_bits(a);
19400
7.46k
    word32 baseSz = ((word32)bits + 7) >> 3;
19401
7.46k
    DECL_SP_INT_ARRAY(ds, a->used + 1, 2);
19402
7.46k
    DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
19403
19404
7.46k
    ALLOC_SP_INT_ARRAY(ds, a->used + 1, 2, err, NULL);
19405
7.46k
    ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
19406
7.46k
    if (err == MP_OKAY) {
19407
7.38k
        sp_int* c  = ds[0];
19408
7.38k
        sp_int* n1 = ds[1];
19409
7.38k
        sp_int* b  = d[0];
19410
7.38k
        sp_int* r  = d[1];
19411
19412
7.38k
        _sp_init_size(c , a->used + 1U);
19413
7.38k
        _sp_init_size(n1, a->used + 1U);
19414
7.38k
        _sp_init_size(b , (sp_size_t)(a->used * 2U + 1U));
19415
7.38k
        _sp_init_size(r , (sp_size_t)(a->used * 2U + 1U));
19416
19417
7.38k
        _sp_sub_d(a, 2, c);
19418
19419
7.38k
        bits &= SP_WORD_MASK;
19420
19421
        /* Keep trying random numbers until all trials complete. */
19422
145k
        while (trials > 0) {
19423
            /* Generate random trial number. */
19424
144k
            err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
19425
144k
            if (err != MP_OKAY) {
19426
89
                break;
19427
89
            }
19428
144k
            b->used = a->used;
19429
        #ifdef BIG_ENDIAN_ORDER
19430
            /* Fix top digit if fewer bytes than a full digit generated. */
19431
            if (((baseSz * 8) & SP_WORD_MASK) != 0) {
19432
                b->dp[b->used-1] >>=
19433
                    SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
19434
            }
19435
        #endif /* BIG_ENDIAN_ORDER */
19436
19437
            /* Ensure the top word has no more bits than necessary. */
19438
144k
            if (bits > 0) {
19439
107k
                b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
19440
107k
                sp_clamp(b);
19441
107k
            }
19442
19443
            /* Can't use random value it is: 0, 1, a-2, a-1, >= a  */
19444
144k
            if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
19445
42.5k
                continue;
19446
42.5k
            }
19447
19448
            /* Perform Miller-Rabin test with random value. */
19449
101k
            err = sp_prime_miller_rabin(a, b, result, n1, r);
19450
101k
            if ((err != MP_OKAY) || (*result == MP_NO)) {
19451
6.01k
                break;
19452
6.01k
            }
19453
19454
            /* Trial complete. */
19455
95.9k
            trials--;
19456
95.9k
        }
19457
19458
        /* Zeroize temporary values used when generating private prime. */
19459
7.38k
        sp_forcezero(n1);
19460
7.38k
        sp_forcezero(r);
19461
7.38k
        sp_forcezero(b);
19462
7.38k
        sp_forcezero(c);
19463
7.38k
    }
19464
19465
7.46k
    FREE_SP_INT_ARRAY(d, NULL);
19466
7.46k
    FREE_SP_INT_ARRAY(ds, NULL);
19467
7.46k
    return err;
19468
7.46k
}
19469
#endif /*!WC_NO_RNG */
19470
19471
/* Check whether a is prime.
19472
 * Checks against a number of small primes and does t iterations of
19473
 * Miller-Rabin.
19474
 *
19475
 * @param  [in]   a       SP integer to check.
19476
 * @param  [in]   trials  Number of iterations of Miller-Rabin test to perform.
19477
 * @param  [out]  result  MP_YES when number is prime.
19478
 *                        MP_NO otherwise.
19479
 * @param  [in]   rng     Random number generator for Miller-Rabin testing.
19480
 *
19481
 * @return  MP_OKAY on success.
19482
 * @return  MP_VAL when a, result or rng is NULL.
19483
 * @return  MP_MEM when dynamic memory allocation fails.
19484
 */
19485
int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng)
19486
23.1k
{
19487
23.1k
    int err = MP_OKAY;
19488
23.1k
    int ret = MP_YES;
19489
23.1k
    int haveRes = 0;
19490
19491
23.1k
    if ((a == NULL) || (result == NULL) || (rng == NULL)) {
19492
0
        err = MP_VAL;
19493
0
    }
19494
23.1k
#ifndef WC_NO_RNG
19495
23.1k
    if ((err == MP_OKAY) && (a->used * 2 >= SP_INT_DIGITS)) {
19496
3
        err = MP_VAL;
19497
3
    }
19498
23.1k
#endif
19499
23.1k
#ifdef WOLFSSL_SP_INT_NEGATIVE
19500
23.1k
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
19501
16
        err = MP_VAL;
19502
16
    }
19503
23.1k
#endif
19504
19505
    /* Ensure trials is valid. Maximum based on number of small primes
19506
     * available. */
19507
23.1k
    if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
19508
16
        err = MP_VAL;
19509
16
    }
19510
19511
23.1k
    if ((err == MP_OKAY) && sp_isone(a)) {
19512
11
        ret = MP_NO;
19513
11
        haveRes = 1;
19514
11
    }
19515
19516
23.1k
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19517
19518
    /* Check against known small primes when a has 1 digit. */
19519
23.1k
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
19520
2.39k
            (a->dp[0] <= (sp_int_digit)sp_primes[SP_PRIME_SIZE - 1])) {
19521
399
        haveRes = sp_cmp_primes(a, &ret);
19522
399
    }
19523
19524
    /* Check all small primes for even divisibility. */
19525
23.1k
    if ((err == MP_OKAY) && (!haveRes)) {
19526
22.9k
        err = sp_div_primes(a, &haveRes, &ret);
19527
22.9k
    }
19528
19529
23.1k
#ifndef WC_NO_RNG
19530
    /* Check a number of iterations of Miller-Rabin with random large values. */
19531
23.1k
    if ((err == MP_OKAY) && (!haveRes)) {
19532
4.67k
        err = _sp_prime_random_trials(a, trials, &ret, rng);
19533
4.67k
    }
19534
#else
19535
    (void)trials;
19536
#endif /* !WC_NO_RNG */
19537
19538
23.1k
    if (result != NULL) {
19539
23.1k
        *result = ret;
19540
23.1k
    }
19541
19542
23.1k
    RESTORE_VECTOR_REGISTERS();
19543
19544
23.1k
    return err;
19545
23.1k
}
19546
#endif /* WOLFSSL_SP_PRIME_GEN */
19547
19548
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
19549
19550
/* Calculates the Greatest Common Denominator (GCD) of a and b into r.
19551
 *
19552
 * Find the largest number that divides both a and b without remainder.
19553
 * r <= a, r <= b, a % r == 0, b % r == 0
19554
 *
19555
 * a and b are positive integers.
19556
 *
19557
 * Euclidean Algorithm:
19558
 *  1. If a > b then a = b, b = a
19559
 *  2. u = a
19560
 *  3. v = b % a
19561
 *  4. While v != 0
19562
 *   4.1. t = u % v
19563
 *   4.2. u <= v, v <= t, t <= u
19564
 *  5. r = u
19565
 *
19566
 * @param  [in]   a  SP integer of first operand.
19567
 * @param  [in]   b  SP integer of second operand.
19568
 * @param  [out]  r  SP integer to hold result.
19569
 *
19570
 * @return  MP_OKAY on success.
19571
 * @return  MP_MEM when dynamic memory allocation fails.
19572
 */
19573
static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
19574
1.36k
{
19575
1.36k
    int err = MP_OKAY;
19576
1.36k
    sp_int* u = NULL;
19577
1.36k
    sp_int* v = NULL;
19578
1.36k
    sp_int* t = NULL;
19579
    /* Used for swapping sp_ints. */
19580
1.36k
    sp_int* s;
19581
    /* Determine maximum digit length numbers will reach. */
19582
1.36k
    unsigned int used = (a->used >= b->used) ? a->used + 1U : b->used + 1U;
19583
1.36k
    DECL_SP_INT_ARRAY(d, used, 3);
19584
19585
1.36k
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19586
19587
1.36k
    ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
19588
1.36k
    if (err == MP_OKAY) {
19589
1.34k
        u = d[0];
19590
1.34k
        v = d[1];
19591
1.34k
        t = d[2];
19592
19593
1.34k
        _sp_init_size(u, used);
19594
1.34k
        _sp_init_size(v, used);
19595
1.34k
        _sp_init_size(t, used);
19596
19597
        /* 1. If a > b then a = b, b = a.
19598
         *    Make a <= b.
19599
         */
19600
1.34k
        if (_sp_cmp(a, b) == MP_GT) {
19601
665
            const sp_int* tmp;
19602
665
            tmp = a;
19603
665
            a = b;
19604
665
            b = tmp;
19605
665
        }
19606
        /* 2. u = a, v = b mod a */
19607
1.34k
        _sp_copy(a, u);
19608
        /* 3. v = b mod a */
19609
1.34k
        if (a->used == 1) {
19610
851
            err = sp_mod_d(b, a->dp[0], &v->dp[0]);
19611
851
            v->used = (v->dp[0] != 0);
19612
851
        }
19613
497
        else {
19614
497
            err = sp_mod(b, a, v);
19615
497
        }
19616
1.34k
    }
19617
19618
    /* 4. While v != 0 */
19619
    /* Keep reducing larger by smaller until smaller is 0 or u and v both one
19620
     * digit.
19621
     */
19622
36.3k
    while ((err == MP_OKAY) && (!sp_iszero(v)) && (u->used > 1)) {
19623
        /* u' = v, v' = u mod v */
19624
        /* 4.1 t = u mod v */
19625
35.0k
        if (v->used == 1) {
19626
474
            err = sp_mod_d(u, v->dp[0], &t->dp[0]);
19627
474
            t->used = (t->dp[0] != 0);
19628
474
        }
19629
34.5k
        else {
19630
34.5k
            err = sp_mod(u, v, t);
19631
34.5k
        }
19632
        /* 4.2. u <= v, v <= t, t <= u */
19633
35.0k
        s = u; u = v; v = t; t = s;
19634
35.0k
    }
19635
    /* Only one digit remaining in u and v. */
19636
22.4k
    while ((err == MP_OKAY) && (!sp_iszero(v))) {
19637
        /* u' = v, v' = u mod v */
19638
        /* 4.1 t = u mod v */
19639
21.1k
        t->dp[0] = u->dp[0] % v->dp[0];
19640
21.1k
        t->used = (t->dp[0] != 0);
19641
        /* 4.2. u <= v, v <= t, t <= u */
19642
21.1k
        s = u; u = v; v = t; t = s;
19643
21.1k
    }
19644
1.36k
    if (err == MP_OKAY) {
19645
        /* 5. r = u */
19646
1.34k
        _sp_copy(u, r);
19647
1.34k
    }
19648
19649
1.36k
    FREE_SP_INT_ARRAY(d, NULL);
19650
19651
1.36k
    RESTORE_VECTOR_REGISTERS();
19652
19653
1.36k
    return err;
19654
1.36k
}
19655
19656
/* Calculates the Greatest Common Denominator (GCD) of a and b into r.
19657
 *
19658
 * Find the largest number that divides both a and b without remainder.
19659
 * r <= a, r <= b, a % r == 0, b % r == 0
19660
 *
19661
 * a and b are positive integers.
19662
 *
19663
 * @param  [in]   a  SP integer of first operand.
19664
 * @param  [in]   b  SP integer of second operand.
19665
 * @param  [out]  r  SP integer to hold result.
19666
 *
19667
 * @return  MP_OKAY on success.
19668
 * @return  MP_VAL when a, b or r is NULL or too large.
19669
 * @return  MP_MEM when dynamic memory allocation fails.
19670
 */
19671
int sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
19672
690
{
19673
690
    int err = MP_OKAY;
19674
19675
    /* Validate parameters. */
19676
690
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
19677
0
        err = MP_VAL;
19678
0
    }
19679
    /* Check that we have space in numbers to do work. */
19680
690
    else if ((a->used >= SP_INT_DIGITS) || (b->used >= SP_INT_DIGITS)) {
19681
8
        err = MP_VAL;
19682
8
    }
19683
    /* Check that r is large enough to hold maximum sized result. */
19684
682
    else if (((a->used <= b->used) && (r->size < a->used)) ||
19685
678
             ((b->used < a->used) && (r->size < b->used))) {
19686
8
        err = MP_VAL;
19687
8
    }
19688
674
#ifdef WOLFSSL_SP_INT_NEGATIVE
19689
    /* Algorithm doesn't work with negative numbers. */
19690
674
    else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
19691
0
        err = MP_VAL;
19692
0
    }
19693
674
#endif
19694
674
    else if (sp_iszero(a)) {
19695
        /* GCD of 0 and 0 is undefined - all integers divide 0. */
19696
80
        if (sp_iszero(b)) {
19697
32
            err = MP_VAL;
19698
32
        }
19699
48
        else {
19700
            /* GCD of 0 and b is b - b divides 0. */
19701
48
            err = sp_copy(b, r);
19702
48
        }
19703
80
    }
19704
594
    else if (sp_iszero(b)) {
19705
        /* GCD of 0 and a is a - a divides 0. */
19706
42
        err = sp_copy(a, r);
19707
42
    }
19708
552
    else {
19709
        /* Calculate GCD. */
19710
552
        err = _sp_gcd(a, b, r);
19711
552
    }
19712
19713
690
    return err;
19714
690
}
19715
19716
#endif /* !NO_RSA && WOLFSSL_KEY_GEN */
19717
19718
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
19719
    (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
19720
19721
/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
19722
 * Smallest number divisible by both numbers.
19723
 *
19724
 * a and b are positive integers.
19725
 *
19726
 * lcm(a, b) = (a / gcd(a, b)) * b
19727
 * Divide the common divisor from a and multiply by b.
19728
 *
19729
 * Algorithm:
19730
 *  1. t0 = gcd(a, b)
19731
 *  2. If a > b then
19732
 *   2.1. t1 = a / t0
19733
 *   2.2. r = b * t1
19734
 *  3. Else
19735
 *   3.1. t1 = b / t0
19736
 *   3.2. r = a * t1
19737
 *
19738
 * @param  [in]   a  SP integer of first operand.
19739
 * @param  [in]   b  SP integer of second operand.
19740
 * @param  [out]  r  SP integer to hold result.
19741
 *
19742
 * @return  MP_OKAY on success.
19743
 * @return  MP_MEM when dynamic memory allocation fails.
19744
 */
19745
static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
19746
{
19747
    int err = MP_OKAY;
19748
    /* Determine maximum digit length numbers will reach. */
19749
    unsigned int used = ((a->used >= b->used) ? a->used + 1: b->used + 1);
19750
    DECL_SP_INT_ARRAY(t, used, 2);
19751
19752
    ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
19753
    if (err == MP_OKAY) {
19754
        _sp_init_size(t[0], used);
19755
        _sp_init_size(t[1], used);
19756
19757
        SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19758
19759
        if (err == MP_OKAY) {
19760
            /* 1. t0 = gcd(a, b) */
19761
            err = sp_gcd(a, b, t[0]);
19762
        }
19763
19764
        if (err == MP_OKAY) {
19765
            /* Divide the greater by the common divisor and multiply by other
19766
             * to operate on the smallest length numbers.
19767
             */
19768
            /* 2. If a > b then */
19769
            if (_sp_cmp_abs(a, b) == MP_GT) {
19770
                /* 2.1. t1 = a / t0 */
19771
                err = sp_div(a, t[0], t[1], NULL);
19772
                if (err == MP_OKAY) {
19773
                    /* 2.2. r = b * t1 */
19774
                    err = sp_mul(b, t[1], r);
19775
                }
19776
            }
19777
            /* 3. Else */
19778
            else {
19779
                /* 3.1. t1 = b / t0 */
19780
                err = sp_div(b, t[0], t[1], NULL);
19781
                if (err == MP_OKAY) {
19782
                    /* 3.2. r = a * t1 */
19783
                    err = sp_mul(a, t[1], r);
19784
                }
19785
            }
19786
        }
19787
19788
        RESTORE_VECTOR_REGISTERS();
19789
    }
19790
19791
    FREE_SP_INT_ARRAY(t, NULL);
19792
    return err;
19793
}
19794
19795
/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
19796
 * Smallest number divisible by both numbers.
19797
 *
19798
 * a and b are positive integers.
19799
 *
19800
 * @param  [in]   a  SP integer of first operand.
19801
 * @param  [in]   b  SP integer of second operand.
19802
 * @param  [out]  r  SP integer to hold result.
19803
 *
19804
 * @return  MP_OKAY on success.
19805
 * @return  MP_VAL when a, b or r is NULL; or a or b is zero.
19806
 * @return  MP_MEM when dynamic memory allocation fails.
19807
 */
19808
int sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
19809
{
19810
    int err = MP_OKAY;
19811
19812
    /* Validate parameters. */
19813
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
19814
        err = MP_VAL;
19815
    }
19816
#ifdef WOLFSSL_SP_INT_NEGATIVE
19817
    /* Ensure a and b are positive. */
19818
    else if ((a->sign == MP_NEG) || (b->sign >= MP_NEG)) {
19819
        err = MP_VAL;
19820
    }
19821
#endif
19822
    /* Ensure r has space for maximumal result. */
19823
    else if (r->size < a->used + b->used) {
19824
        err = MP_VAL;
19825
    }
19826
19827
    /* LCM of 0 and any number is undefined as 0 is not in the set of values
19828
     * being used.
19829
     */
19830
    if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
19831
        err = MP_VAL;
19832
    }
19833
19834
    if (err == MP_OKAY) {
19835
        /* Do operation. */
19836
        err = _sp_lcm(a, b, r);
19837
    }
19838
19839
    return err;
19840
}
19841
19842
#endif /* !NO_RSA && WOLFSSL_KEY_GEN && (!WC_RSA_BLINDING || HAVE_FIPS ||
19843
        * HAVE_SELFTEST) */
19844
19845
/* Returns the run time settings.
19846
 *
19847
 * @return  Settings value.
19848
 */
19849
word32 CheckRunTimeSettings(void)
19850
0
{
19851
0
    return CTC_SETTINGS;
19852
0
}
19853
19854
/* Returns the fast math settings.
19855
 *
19856
 * @return  Setting - number of bits in a digit.
19857
 */
19858
word32 CheckRunTimeFastMath(void)
19859
0
{
19860
0
    return SP_WORD_SIZE;
19861
0
}
19862
19863
#ifdef WOLFSSL_CHECK_MEM_ZERO
19864
/* Add an MP to check.
19865
 *
19866
 * @param [in] name  Name of address to check.
19867
 * @param [in] sp    sp_int that needs to be checked.
19868
 */
19869
void sp_memzero_add(const char* name, sp_int* sp)
19870
{
19871
    wc_MemZero_Add(name, sp->dp, sp->size * sizeof(sp_int_digit));
19872
}
19873
19874
/* Check the memory in the data pointer for memory that must be zero.
19875
 *
19876
 * @param [in] sp    sp_int that needs to be checked.
19877
 */
19878
void sp_memzero_check(sp_int* sp)
19879
{
19880
    wc_MemZero_Check(sp->dp, sp->size * sizeof(sp_int_digit));
19881
}
19882
#endif /* WOLFSSL_CHECK_MEM_ZERO */
19883
19884
#ifdef WOLFSSL_SP_DYN_STACK
19885
    PRAGMA_GCC_DIAG_POP
19886
#endif
19887
19888
#endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */