Coverage Report

Created: 2026-05-16 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wolfssl/wolfcrypt/src/sp_int.c
Line
Count
Source
1
/* sp_int.c
2
 *
3
 * Copyright (C) 2006-2026 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Implementation by Sean Parkinson. */
23
24
/*
25
DESCRIPTION
26
This library provides single precision (SP) integer math functions.
27
28
*/
29
30
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
31
32
#if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
33
34
#ifdef NO_INLINE
35
    #include <wolfssl/wolfcrypt/misc.h>
36
#else
37
    #define WOLFSSL_MISC_INCLUDED
38
    #include <wolfcrypt/src/misc.c>
39
#endif
40
41
/* SP Build Options:
42
 * WOLFSSL_HAVE_SP_RSA:         Enable SP RSA support
43
 * WOLFSSL_HAVE_SP_DH:          Enable SP DH support
44
 * WOLFSSL_HAVE_SP_ECC:         Enable SP ECC support
45
 * WOLFSSL_SP_MATH:             Use only single precision math and algorithms
46
 *      it supports (no fastmath tfm.c or normal integer.c)
47
 * WOLFSSL_SP_MATH_ALL          Implementation of all MP functions
48
 *      (replacement for tfm.c and integer.c)
49
 * WOLFSSL_SP_SMALL:            Use smaller version of code and avoid large
50
 *      stack variables
51
 * WOLFSSL_SP_NO_MALLOC:        Always use stack, no heap XMALLOC/XFREE allowed
52
 * WOLFSSL_SP_NO_2048:          Disable RSA/DH 2048-bit support
53
 * WOLFSSL_SP_NO_3072:          Disable RSA/DH 3072-bit support
54
 * WOLFSSL_SP_4096:             Enable RSA/RH 4096-bit support
55
 * WOLFSSL_SP_NO_256            Disable ECC 256-bit SECP256R1 support
56
 * WOLFSSL_SP_384               Enable ECC 384-bit SECP384R1 support
57
 * WOLFSSL_SP_521               Enable ECC 521-bit SECP521R1 support
58
 * WOLFSSL_SP_ASM               Enable assembly speedups (detect platform)
59
 * WOLFSSL_SP_X86_64_ASM        Enable Intel x64 assembly implementation
60
 * WOLFSSL_SP_ARM32_ASM         Enable Aarch32 assembly implementation
61
 * WOLFSSL_SP_ARM64_ASM         Enable Aarch64 assembly implementation
62
 * WOLFSSL_SP_ARM_CORTEX_M_ASM  Enable Cortex-M assembly implementation
63
 * WOLFSSL_SP_ARM_THUMB_ASM     Enable ARM Thumb assembly implementation
64
 *      (used with -mthumb)
65
 * WOLFSSL_SP_X86_64            Enable Intel x86 64-bit assembly speedups
66
 * WOLFSSL_SP_X86               Enable Intel x86 assembly speedups
67
 * WOLFSSL_SP_ARM64             Enable Aarch64 assembly speedups
68
 * WOLFSSL_SP_ARM32             Enable ARM32 assembly speedups
69
 * WOLFSSL_SP_ARM32_UDIV        Enable word divide asm that uses UDIV instr
70
 * WOLFSSL_SP_ARM_THUMB         Enable ARM Thumb assembly speedups
71
 *                              (explicitly uses register 'r7')
72
 * WOLFSSL_SP_PPC64             Enable PPC64 assembly speedups
73
 * WOLFSSL_SP_PPC               Enable PPC assembly speedups
74
 * WOLFSSL_SP_MIPS64            Enable MIPS64 assembly speedups
75
 * WOLFSSL_SP_MIPS              Enable MIPS assembly speedups
76
 * WOLFSSL_SP_RISCV64           Enable RISCV64 assembly speedups
77
 * WOLFSSL_SP_RISCV32           Enable RISCV32 assembly speedups
78
 * WOLFSSL_SP_S390X             Enable S390X assembly speedups
79
 * SP_WORD_SIZE                 Force 32 or 64 bit mode
80
 * WOLFSSL_SP_NONBLOCK          Enables "non blocking" mode for SP math, which
81
 *      will return FP_WOULDBLOCK for long operations and function must be
82
 *      called again until complete.
83
 * WOLFSSL_SP_FAST_NCT_EXPTMOD  Enables the faster non-constant time modular
84
 *      exponentiation implementation.
85
 * WOLFSSL_SP_INT_NEGATIVE      Enables negative values to be used.
86
 * WOLFSSL_SP_INT_DIGIT_ALIGN   Enable when unaligned access of sp_int_digit
87
 *                              pointer is not allowed.
88
 * WOLFSSL_SP_NO_DYN_STACK      Disable use of dynamic stack items.
89
 *                              Dynamic arrays used when not small stack.
90
 * WOLFSSL_SP_FAST_MODEXP       Allow fast mod_exp with small C code
91
 * WOLFSSL_SP_LOW_MEM           Use algorithms that use less memory.
92
 * WOLFSSL_SMALL_STACK:         Use heap for large structures to reduce
93
 *      stack usage
94
 * WOLFSSL_KEY_GEN:             Key generation support enabled
95
 * WOLFSSL_RSA_PUBLIC_ONLY:     Only RSA public operations compiled in
96
 * WOLFSSL_RSA_VERIFY_ONLY:     Only RSA verify operations compiled in
97
 * NO_RSA:                      RSA support disabled
98
 * NO_DH:                       DH support disabled
99
 * NO_DSA:                      DSA support disabled
100
 * NO_INLINE:                   sp_int.c includes misc.c directly instead of
101
 *      inlining
102
 * HAVE_ECC:                    ECC support enabled, enables ECC-related SP
103
 *      functions
104
 * HAVE_FIPS:                   FIPS mode enabled
105
 * HAVE_WOLF_BIGINT:            wolfBigInt support, enables bigint conversion
106
 *      functions
107
 * FREESCALE_LTC_TFM:           Freescale LTC hardware acceleration replaces SP
108
 *      modular exponentiation
109
 * OPENSSL_EXTRA:               OpenSSL API compatibility enabled
110
 * OPENSSL_ALL:                 Full OpenSSL API compatibility enabled
111
 * WC_NO_HARDEN:                Disable timing attack resistance
112
 * WC_NO_CACHE_RESISTANT:       Disable cache-resistant (constant-address)
113
 *      operations
114
 * WC_NO_RNG:                   No RNG available, disables functions needing
115
 *      random numbers
116
 * WC_PROTECT_ENCRYPTED_MEM:    Enable protection of encrypted memory
117
 *      operations
118
 * WC_DISABLE_RADIX_ZERO_PAD:   Disable zero padding when converting to a
119
 *      radix string
120
 * WOLFSSL_NO_CT_OPS:           Disable constant-time operations
121
 * WOLFSSL_CHECK_MEM_ZERO:      Enable checking that sensitive memory is
122
 *      zeroed on free
123
 * WOLFSSL_SP_MILLER_RABIN_CNT: Number of Miller-Rabin rounds for prime
124
 *      testing (default: 8)
125
 * WOLFSSL_NO_ASM:              Disable all assembly implementations
126
 * WOLFSSL_KEIL:                Keil compiler in use, affects inline assembly
127
 *      syntax
128
 * WOLFSSL_USE_SAVE_VECTOR_REGISTERS: Save/restore vector registers around
129
 *      SP ASM calls
130
 * WOLFSSL_SP_INT_LARGE_COMBA:  Enable large Comba multiplication and
131
 *      squaring
132
 * WOLFSSL_SP_INT_SQR_VOLATILE: Declare squaring intermediate variables as
133
 *      volatile
134
 * SP_INT_NO_ASM:               Disable use of SP ASM even when
135
 *      SP_INT_ASM_AVAILABLE is set
136
 * SP_MATH_NEED_ADD_OFF:        Enable sp_add variant with an offset into
137
 *      the result
138
 *
139
 * The following are not user settable but are set in settings.h or sp_int.h
140
 * based on other defines and platform:
141
 * BIG_ENDIAN_ORDER:            (Auto) Set in types.h when WORDS_BIGENDIAN
142
 *      is defined by the platform or build system
143
 * LITTLE_ENDIAN_ORDER:         (Auto) Set in types.h when BIG_ENDIAN_ORDER
144
 *      is not defined; the default byte ordering
145
 * WOLFSSL_SP_DYN_STACK:        (Auto) Set in sp_int.h when C99 and
146
 *      conditions allow a dynamic stack sp_int
147
 * WOLFSSL_SP_DIV_WORD_HALF:    (Auto) Set in sp_int.h/settings.h when
148
 *      platform lacks a native double-word type
149
 * WOLFSSL_ARM_ARCH:            (Auto) Set in sp_int.h as alias for
150
 *      WOLFSSL_SP_ARM_ARCH; use WOLFSSL_SP_ARM_ARCH to configure
151
 * WOLFSSL_SP_ADD_D:            (Auto) Set in settings.h; enables sp_add_d
152
 *      based on which algorithms are active
153
 * WOLFSSL_SP_SUB_D:            (Auto) Set in settings.h; enables sp_sub_d
154
 *      based on which algorithms are active
155
 * WOLFSSL_SP_MUL_D:            (Auto) Set in settings.h; enables sp_mul_d
156
 *      based on which algorithms are active
157
 * WOLFSSL_SP_DIV_D:            (Auto) Set in sp_int.c; enables sp_div_d
158
 *      based on which algorithms are active
159
 * WOLFSSL_SP_MOD_D:            (Auto) Set in sp_int.c; enables sp_mod_d
160
 *      based on which algorithms are active
161
 * WOLFSSL_SP_INVMOD:           (Auto) Set in settings.h; enables
162
 *      sp_invmod based on which algorithms are active
163
 * WOLFSSL_SP_INVMOD_MONT_CT:   (Auto) Set in settings.h; enables
164
 *      constant-time Montgomery inverse when needed
165
 * WOLFSSL_SP_PRIME_GEN:        (Auto) Set in settings.h; enables prime
166
 *      generation based on which algorithms are active
167
 * WOLFSSL_SP_READ_RADIX_16:    (Auto) Set in settings.h; enables reading
168
 *      base-16 strings based on which algorithms are active
169
 * WOLFSSL_SP_READ_RADIX_10:    (Auto) Set in settings.h; enables reading
170
 *      base-10 strings based on which algorithms are active
171
 *
172
 * SP_ALLOC:                    (Internal) Heap allocation in use for SP
173
 *      variables in exptmod
174
 * SP_ALLOC_PREDEFINED:         (Internal) Set when SP_ALLOC was defined
175
 *      before this file
176
 * SP_INT_ASM_AVAILABLE:        (Internal) Set when a platform ASM
177
 *      implementation is present
178
 * SP_ASM_DIV_WORD:             (Internal) Platform macro: hardware
179
 *      double-word division available
180
 * SP_WORD_OVERFLOW:            (Internal) Set in sp_int.h when mul/sqr
181
 *      partial sums can overflow sp_int_word
182
 */
183
184
/* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
185
#if defined(__clang__) && defined(__clang_major__) && \
186
    (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
187
    #undef WOLFSSL_SP_SMALL
188
#endif
189
190
#include <wolfssl/wolfcrypt/sp_int.h>
191
192
#ifdef WOLFSSL_SP_DYN_STACK
193
/* We are statically declaring a variable smaller than sp_int.
194
 * We track available memory in the 'size' field.
195
 * Disable warnings of sp_int being partly outside array bounds of variable.
196
 */
197
    PRAGMA_GCC_DIAG_PUSH
198
    PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"")
199
#endif
200
201
#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM)
202
    /* force off unneeded vector register save/restore. */
203
    #undef SAVE_VECTOR_REGISTERS
204
    #define SAVE_VECTOR_REGISTERS(fail_clause) \
205
        SAVE_NO_VECTOR_REGISTERS(fail_clause)
206
    #undef RESTORE_VECTOR_REGISTERS
207
    #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS()
208
#endif
209
210
/* DECL_SP_INT: Declare one variable of type 'sp_int'. */
211
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
212
    !defined(WOLFSSL_SP_NO_MALLOC)
213
    /* Declare a variable that will be assigned a value on XMALLOC. */
214
    #define DECL_SP_INT(n, s)   \
215
        sp_int* n = NULL
216
#else
217
    #ifdef WOLFSSL_SP_DYN_STACK
218
        /* Declare a variable on the stack with the required data size. */
219
        #define DECL_SP_INT(n, s)                       \
220
0
            sp_int_digit n##d[MP_INT_SIZEOF_DIGITS(s)]; \
221
0
            sp_int* (n) = (sp_int*)n##d
222
    #else
223
        /* Declare a variable on the stack. */
224
        #define DECL_SP_INT(n, s)               \
225
            sp_int n[1]
226
    #endif
227
#endif
228
229
/* ALLOC_SP_INT: Allocate an 'sp_int' of required size. */
230
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
231
    !defined(WOLFSSL_SP_NO_MALLOC)
232
    /* Dynamically allocate just enough data to support size. */
233
    #define ALLOC_SP_INT(n, s, err, h)                                         \
234
    do {                                                                       \
235
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
236
            (err) = MP_VAL;                                                    \
237
        }                                                                      \
238
        if ((err) == MP_OKAY) {                                                \
239
            (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h),                      \
240
                DYNAMIC_TYPE_BIGINT);                                          \
241
            if ((n) == NULL) {                                                 \
242
                (err) = MP_MEM;                                                \
243
            }                                                                  \
244
        }                                                                      \
245
    }                                                                          \
246
    while (0)
247
248
    /* Dynamically allocate just enough data to support size - and set size. */
249
    #define ALLOC_SP_INT_SIZE(n, s, err, h)                                    \
250
    do {                                                                       \
251
        ALLOC_SP_INT(n, s, err, h);                                            \
252
        if ((err) == MP_OKAY) {                                                \
253
            (n)->size = (sp_size_t)(s);                                        \
254
        }                                                                      \
255
    }                                                                          \
256
    while (0)
257
#else
258
    /* Array declared on stack - check size is valid. */
259
    #define ALLOC_SP_INT(n, s, err, h)                                         \
260
0
    do {                                                                       \
261
0
        if (((err) == MP_OKAY) && ((s) > (int)SP_INT_DIGITS)) {                \
262
0
            (err) = MP_VAL;                                                    \
263
0
        }                                                                      \
264
0
    }                                                                          \
265
0
    while (0)
266
267
    /* Array declared on stack - set the size field. */
268
    #define ALLOC_SP_INT_SIZE(n, s, err, h)                                    \
269
0
    do {                                                                       \
270
0
        ALLOC_SP_INT(n, s, err, h);                                            \
271
0
        if ((err) == MP_OKAY) {                                                \
272
0
            (n)->size = (sp_size_t)(s);                                        \
273
0
        }                                                                      \
274
0
    }                                                                          \
275
0
    while (0)
276
#endif
277
278
/* FREE_SP_INT: Free an 'sp_int' variable. */
279
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
280
    !defined(WOLFSSL_SP_NO_MALLOC)
281
    /* Free dynamically allocated data. */
282
    #define FREE_SP_INT(n, h)                   \
283
    do {                                        \
284
        if ((n) != NULL) {                      \
285
            XFREE(n, h, DYNAMIC_TYPE_BIGINT);   \
286
        }                                       \
287
    }                                           \
288
    while (0)
289
#else
290
    /* Nothing to do as declared on stack. */
291
0
    #define FREE_SP_INT(n, h) WC_DO_NOTHING
292
#endif
293
294
295
/* Declare a variable that will be assigned a value on XMALLOC. */
296
#define DECL_DYN_SP_INT_ARRAY(n, s, c)               \
297
0
    sp_int* n##d = NULL;                             \
298
0
    sp_int* (n)[c];                                  \
299
0
    void *n ## _dummy_var = XMEMSET(n, 0, sizeof(n))
300
301
/* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
302
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
303
    !defined(WOLFSSL_SP_NO_MALLOC)
304
    /* Declare a variable that will be assigned a value on XMALLOC. */
305
    #define DECL_SP_INT_ARRAY(n, s, c)  \
306
        DECL_DYN_SP_INT_ARRAY(n, s, c)
307
#elif defined(WOLFSSL_SP_DYN_STACK)
308
    /* Declare a variable on the stack with the required data size. */
309
    #define DECL_SP_INT_ARRAY(n, s, c)                    \
310
0
        sp_int_digit n##d[MP_INT_SIZEOF_DIGITS(s) * (c)]; \
311
0
        sp_int* (n)[c] = { NULL, }
312
#else
313
    /* Declare a variable on the stack. */
314
    #define DECL_SP_INT_ARRAY(n, s, c)      \
315
        sp_int n##d[c];                     \
316
        sp_int* (n)[c]
317
#endif
318
319
/* Dynamically allocate just enough data to support multiple sp_ints of the
320
 * required size. Use pointers into data to make up array and set sizes.
321
 */
322
0
#define ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)                                \
323
0
do {                                                                           \
324
0
    (void)n ## _dummy_var;                                                     \
325
0
    if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                         \
326
0
        (err) = MP_VAL;                                                        \
327
0
    }                                                                          \
328
0
    if ((err) == MP_OKAY) {                                                    \
329
0
        n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h),                   \
330
0
                                                         DYNAMIC_TYPE_BIGINT); \
331
0
        if (n##d == NULL) {                                                    \
332
0
            (err) = MP_MEM;                                                    \
333
0
        }                                                                      \
334
0
        else {                                                                 \
335
0
            int n##ii;                                                         \
336
0
            (n)[0] = n##d;                                                     \
337
0
            (n)[0]->size = (sp_size_t)(s);                                     \
338
0
            for (n##ii = 1; n##ii < (int)(c); n##ii++) {                       \
339
0
                (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                     \
340
0
                (n)[n##ii]->size = (sp_size_t)(s);                             \
341
0
            }                                                                  \
342
0
        }                                                                      \
343
0
    }                                                                          \
344
0
}                                                                              \
345
0
while (0)
346
347
/* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of required size. */
348
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
349
    !defined(WOLFSSL_SP_NO_MALLOC)
350
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
351
        ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)
352
#elif defined(WOLFSSL_SP_DYN_STACK)
353
    /* Data declared on stack that supports multiple sp_ints of the
354
     * required size. Use pointers into data to make up array and set sizes.
355
     */
356
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                                \
357
0
    do {                                                                       \
358
0
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
359
0
            (err) = MP_VAL;                                                    \
360
0
        }                                                                      \
361
0
        if ((err) == MP_OKAY) {                                                \
362
0
            int n##ii;                                                         \
363
0
            (n)[0] = (sp_int*)n##d;                                            \
364
0
            ((sp_int_minimal*)(n)[0])->size = (sp_size_t)(s);                  \
365
0
            for (n##ii = 1; n##ii < (int)(c); n##ii++) {                       \
366
0
                (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                     \
367
0
                ((sp_int_minimal*)(n)[n##ii])->size = (sp_size_t)(s);          \
368
0
            }                                                                  \
369
0
        }                                                                      \
370
0
    }                                                                          \
371
0
    while (0)
372
#else
373
    /* Data declared on stack that supports multiple sp_ints of the
374
     * required size. Set into array and set sizes.
375
     */
376
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                                \
377
    do {                                                                       \
378
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
379
            (err) = MP_VAL;                                                    \
380
        }                                                                      \
381
        if ((err) == MP_OKAY) {                                                \
382
            int n##ii;                                                         \
383
            for (n##ii = 0; n##ii < (int)(c); n##ii++) {                       \
384
                (n)[n##ii] = &n##d[n##ii];                                     \
385
                (n)[n##ii]->size = (sp_size_t)(s);                             \
386
            }                                                                  \
387
        }                                                                      \
388
    }                                                                          \
389
    while (0)
390
#endif
391
392
/* Free data variable that was dynamically allocated. */
393
0
#define FREE_DYN_SP_INT_ARRAY(n, h)             \
394
0
do {                                            \
395
0
    if (n##d != NULL) {                         \
396
0
        XFREE(n##d, h, DYNAMIC_TYPE_BIGINT);    \
397
0
    }                                           \
398
0
}                                               \
399
0
while (0)
400
401
/* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
402
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
403
    !defined(WOLFSSL_SP_NO_MALLOC)
404
    #define FREE_SP_INT_ARRAY(n, h)                 \
405
        FREE_DYN_SP_INT_ARRAY(n, h)
406
#else
407
    /* Nothing to do as data declared on stack. */
408
0
    #define FREE_SP_INT_ARRAY(n, h) WC_DO_NOTHING
409
#endif
410
411
412
#ifndef WOLFSSL_NO_ASM
413
    #ifdef __IAR_SYSTEMS_ICC__
414
        #define __asm__        asm
415
        #define __volatile__   volatile
416
    #endif /* __IAR_SYSTEMS_ICC__ */
417
    #ifdef __KEIL__
418
        #define __asm__        __asm
419
        #define __volatile__   volatile
420
    #endif
421
422
    #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
423
/*
424
 * CPU: x86_64
425
 */
426
427
#ifndef _MSC_VER
428
/* Multiply va by vb and store double size result in: vh | vl */
429
#define SP_ASM_MUL(vl, vh, va, vb)                       \
430
0
    __asm__ __volatile__ (                               \
431
0
        "movq %[b], %%rax \n\t"                    \
432
0
        "mulq %[a]    \n\t"                    \
433
0
        "movq %%rax, %[l] \n\t"                    \
434
0
        "movq %%rdx, %[h] \n\t"                    \
435
0
        : [h] "+r" (vh), [l] "+r" (vl)                   \
436
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
437
0
        : "%rax", "%rdx", "cc"                           \
438
0
    )
439
/* Multiply va by vb and store double size result in: vo | vh | vl */
440
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
441
0
    __asm__ __volatile__ (                               \
442
0
        "movq %[b], %%rax \n\t"                    \
443
0
        "mulq %[a]    \n\t"                    \
444
0
        "movq $0   , %[o] \n\t"                    \
445
0
        "movq %%rax, %[l] \n\t"                    \
446
0
        "movq %%rdx, %[h] \n\t"                    \
447
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
448
0
        : [a] "m" (va), [b] "m" (vb)                     \
449
0
        : "%rax", "%rdx", "cc"                           \
450
0
    )
451
/* Multiply va by vb and add double size result into: vo | vh | vl */
452
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
453
0
    __asm__ __volatile__ (                               \
454
0
        "movq %[b], %%rax \n\t"                    \
455
0
        "mulq %[a]    \n\t"                    \
456
0
        "addq %%rax, %[l] \n\t"                    \
457
0
        "adcq %%rdx, %[h] \n\t"                    \
458
0
        "adcq $0   , %[o] \n\t"                    \
459
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
460
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
461
0
        : "%rax", "%rdx", "cc"                           \
462
0
    )
463
/* Multiply va by vb and add double size result into: vh | vl */
464
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
465
0
    __asm__ __volatile__ (                               \
466
0
        "movq %[b], %%rax \n\t"                    \
467
0
        "mulq %[a]    \n\t"                    \
468
0
        "addq %%rax, %[l] \n\t"                    \
469
0
        "adcq %%rdx, %[h] \n\t"                    \
470
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
471
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
472
0
        : "%rax", "%rdx", "cc"                           \
473
0
    )
474
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
475
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
476
0
    __asm__ __volatile__ (                               \
477
0
        "movq %[b], %%rax \n\t"                    \
478
0
        "mulq %[a]    \n\t"                    \
479
0
        "addq %%rax, %[l] \n\t"                    \
480
0
        "adcq %%rdx, %[h] \n\t"                    \
481
0
        "adcq $0   , %[o] \n\t"                    \
482
0
        "addq %%rax, %[l] \n\t"                    \
483
0
        "adcq %%rdx, %[h] \n\t"                    \
484
0
        "adcq $0   , %[o] \n\t"                    \
485
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
486
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
487
0
        : "%rax", "%rdx", "cc"                           \
488
0
    )
489
/* Multiply va by vb and add double size result twice into: vo | vh | vl
490
 * Assumes first add will not overflow vh | vl
491
 */
492
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
493
0
    __asm__ __volatile__ (                               \
494
0
        "movq %[b], %%rax \n\t"                    \
495
0
        "mulq %[a]    \n\t"                    \
496
0
        "addq %%rax, %[l] \n\t"                    \
497
0
        "adcq %%rdx, %[h] \n\t"                    \
498
0
        "addq %%rax, %[l] \n\t"                    \
499
0
        "adcq %%rdx, %[h] \n\t"                    \
500
0
        "adcq $0   , %[o] \n\t"                    \
501
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
502
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
503
0
        : "%rax", "%rdx", "cc"                           \
504
0
    )
505
/* Square va and store double size result in: vh | vl */
506
#define SP_ASM_SQR(vl, vh, va)                           \
507
0
    __asm__ __volatile__ (                               \
508
0
        "movq %[a], %%rax \n\t"                    \
509
0
        "mulq %%rax   \n\t"                    \
510
0
        "movq %%rax, %[l] \n\t"                    \
511
0
        "movq %%rdx, %[h] \n\t"                    \
512
0
        : [h] "+r" (vh), [l] "+r" (vl)                   \
513
0
        : [a] "rm" (va)                                  \
514
0
        : "%rax", "%rdx", "cc"                           \
515
0
    )
516
/* Square va and add double size result into: vo | vh | vl */
517
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
518
0
    __asm__ __volatile__ (                               \
519
0
        "movq %[a], %%rax \n\t"                    \
520
0
        "mulq %%rax   \n\t"                    \
521
0
        "addq %%rax, %[l] \n\t"                    \
522
0
        "adcq %%rdx, %[h] \n\t"                    \
523
0
        "adcq $0   , %[o] \n\t"                    \
524
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
525
0
        : [a] "rm" (va)                                  \
526
0
        : "%rax", "%rdx", "cc"                           \
527
0
    )
528
/* Square va and add double size result into: vh | vl */
529
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
530
0
    __asm__ __volatile__ (                               \
531
0
        "movq %[a], %%rax \n\t"                    \
532
0
        "mulq %%rax   \n\t"                    \
533
0
        "addq %%rax, %[l] \n\t"                    \
534
0
        "adcq %%rdx, %[h] \n\t"                    \
535
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
536
0
        : [a] "rm" (va)                                  \
537
0
        : "%rax", "%rdx", "cc"                           \
538
0
    )
539
/* Add va into: vh | vl */
540
#define SP_ASM_ADDC(vl, vh, va)                          \
541
0
    __asm__ __volatile__ (                               \
542
0
        "addq %[a], %[l]  \n\t"                    \
543
0
        "adcq $0  , %[h]  \n\t"                    \
544
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
545
0
        : [a] "rm" (va)                                  \
546
0
        : "cc"                                           \
547
0
    )
548
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
549
0
    __asm__ __volatile__ (                               \
550
0
        "addq %[a], %[l]  \n\t"                    \
551
0
        "adcq $0  , %[h]  \n\t"                    \
552
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
553
0
        : [a] "r" (va)                                   \
554
0
        : "cc"                                           \
555
0
    )
556
/* Sub va from: vh | vl */
557
#define SP_ASM_SUBB(vl, vh, va)                          \
558
0
    __asm__ __volatile__ (                               \
559
0
        "subq %[a], %[l]  \n\t"                    \
560
0
        "sbbq $0  , %[h]  \n\t"                    \
561
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
562
0
        : [a] "rm" (va)                                  \
563
0
        : "cc"                                           \
564
0
    )
565
/* Sub va from: vh | vl */
566
#define SP_ASM_SUBB_REG(vl, vh, va)                      \
567
0
    __asm__ __volatile__ (                               \
568
0
        "subq %[a], %[l]  \n\t"                    \
569
0
        "sbbq $0  , %[h]  \n\t"                    \
570
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
571
0
        : [a] "r" (va)                                   \
572
0
        : "cc"                                           \
573
0
    )
574
/* Add two times vc | vb | va into vo | vh | vl */
575
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
576
0
    __asm__ __volatile__ (                               \
577
0
        "addq %[a], %[l]  \n\t"                    \
578
0
        "adcq %[b], %[h]  \n\t"                    \
579
0
        "adcq %[c], %[o]  \n\t"                    \
580
0
        "addq %[a], %[l]  \n\t"                    \
581
0
        "adcq %[b], %[h]  \n\t"                    \
582
0
        "adcq %[c], %[o]  \n\t"                    \
583
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
584
0
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
585
0
        : "cc"                                           \
586
0
    )
587
/* Index of highest bit set. */
588
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
589
0
    __asm__ __volatile__ (                               \
590
0
        "bsr  %[a], %[i]  \n\t"                    \
591
0
        : [i] "=r" (vi)                                  \
592
0
        : [a] "r" (va)                                   \
593
0
        : "cc"                                           \
594
0
    )
595
#else
596
#include <intrin.h>
597
598
/* Multiply va by vb and store double size result in: vh | vl */
599
#define SP_ASM_MUL(vl, vh, va, vb)                       \
600
    vl = _umul128(va, vb, &vh)
601
602
/* Multiply va by vb and store double size result in: vo | vh | vl */
603
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
604
    do {                                                 \
605
        vl = _umul128(va, vb, &vh);                      \
606
        vo = 0;                                          \
607
    }                                                    \
608
    while (0)
609
610
/* Multiply va by vb and add double size result into: vo | vh | vl */
611
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
612
    do {                                                 \
613
        unsigned __int64 vtl, vth;                       \
614
        unsigned char c;                                 \
615
        vtl = _umul128(va, vb, &vth);                    \
616
        c = _addcarry_u64(0, vl, vtl, &vl);              \
617
        c = _addcarry_u64(c, vh, vth, &vh);              \
618
            _addcarry_u64(c, vo,   0, &vo);              \
619
    }                                                    \
620
    while (0)
621
622
/* Multiply va by vb and add double size result into: vh | vl */
623
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
624
    do {                                                 \
625
        unsigned __int64 vtl, vth;                       \
626
        unsigned char c;                                 \
627
        vtl = _umul128(va, vb, &vth);                    \
628
        c = _addcarry_u64(0, vl, vtl, &vl);              \
629
            _addcarry_u64(c, vh, vth, &vh);              \
630
    }                                                    \
631
    while (0)
632
633
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
634
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
635
    do {                                                 \
636
        unsigned __int64 vtl, vth;                       \
637
        unsigned char c;                                 \
638
        vtl = _umul128(va, vb, &vth);                    \
639
        c = _addcarry_u64(0, vl, vtl, &vl);              \
640
        c = _addcarry_u64(c, vh, vth, &vh);              \
641
            _addcarry_u64(c, vo,   0, &vo);              \
642
        c = _addcarry_u64(0, vl, vtl, &vl);              \
643
        c = _addcarry_u64(c, vh, vth, &vh);              \
644
            _addcarry_u64(c, vo,   0, &vo);              \
645
    }                                                    \
646
    while (0)
647
/* Multiply va by vb and add double size result twice into: vo | vh | vl
648
 * Assumes first add will not overflow vh | vl
649
 */
650
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
651
    do {                                                 \
652
        unsigned __int64 vtl, vth;                       \
653
        unsigned char c;                                 \
654
        vtl = _umul128(va, vb, &vth);                    \
655
        c = _addcarry_u64(0, vl, vtl, &vl);              \
656
            _addcarry_u64(c, vh, vth, &vh);              \
657
        c = _addcarry_u64(0, vl, vtl, &vl);              \
658
        c = _addcarry_u64(c, vh, vth, &vh);              \
659
            _addcarry_u64(c, vo,   0, &vo);              \
660
    }                                                    \
661
    while (0)
662
663
 /* Square va and store double size result in: vh | vl */
664
#define SP_ASM_SQR(vl, vh, va)                           \
665
    vl = _umul128(va, va, &vh)
666
667
/* Square va and add double size result into: vo | vh | vl */
668
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
669
    do {                                                 \
670
        unsigned __int64 vtl, vth;                       \
671
        unsigned char c;                                 \
672
        vtl = _umul128(va, va, &vth);                    \
673
        c = _addcarry_u64(0, vl, vtl, &vl);              \
674
        c = _addcarry_u64(c, vh, vth, &vh);              \
675
            _addcarry_u64(c, vo,   0, &vo);              \
676
    }                                                    \
677
    while (0)
678
679
/* Square va and add double size result into: vh | vl */
680
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
681
    do {                                                 \
682
        unsigned __int64 vtl, vth;                       \
683
        unsigned char c;                                 \
684
        vtl = _umul128(va, va, &vth);                    \
685
        c = _addcarry_u64(0, vl, vtl, &vl);              \
686
            _addcarry_u64(c, vh, vth, &vh);              \
687
    }                                                    \
688
    while (0)
689
690
/* Add va into: vh | vl */
691
#define SP_ASM_ADDC(vl, vh, va)                          \
692
    do {                                                 \
693
        unsigned char c;                                 \
694
        c = _addcarry_u64(0, vl, va, &vl);               \
695
            _addcarry_u64(c, vh,  0, &vh);               \
696
    }                                                    \
697
    while (0)
698
699
/* Add va, variable in a register, into: vh | vl */
700
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
701
    do {                                                 \
702
        unsigned char c;                                 \
703
        c = _addcarry_u64(0, vl, va, &vl);               \
704
            _addcarry_u64(c, vh,  0, &vh);               \
705
    }                                                    \
706
    while (0)
707
708
/* Sub va from: vh | vl */
709
#define SP_ASM_SUBB(vl, vh, va)                          \
710
    do {                                                 \
711
        unsigned char c;                                 \
712
        c = _subborrow_u64(0, vl, va, &vl);              \
713
            _subborrow_u64(c, vh,  0, &vh);              \
714
    }                                                    \
715
    while (0)
716
717
/* Add two times vc | vb | va into vo | vh | vl */
718
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
719
    do {                                                 \
720
        unsigned char c;                                 \
721
        c = _addcarry_u64(0, vl, va, &vl);               \
722
        c = _addcarry_u64(c, vh, vb, &vh);               \
723
            _addcarry_u64(c, vo, vc, &vo);               \
724
        c = _addcarry_u64(0, vl, va, &vl);               \
725
        c = _addcarry_u64(c, vh, vb, &vh);               \
726
            _addcarry_u64(c, vo, vc, &vo);               \
727
    }                                                    \
728
    while (0)
729
/* Index of highest bit set. */
730
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
731
    do {                                                 \
732
        unsigned long idx;                               \
733
        _BitScanReverse64(&idx, va);                     \
734
        vi = idx;                                        \
735
    }                                                    \
736
    while (0)
737
#endif
738
739
#if !defined(WOLFSSL_SP_DIV_WORD_HALF) && (!defined(_MSC_VER) || \
740
    _MSC_VER >= 1920)
741
/* Divide a two digit number by a digit number and return. (hi | lo) / d
742
 *
743
 * Using divq instruction on Intel x64.
744
 *
745
 * @param [in] hi  SP integer digit. High digit of the dividend.
746
 * @param [in] lo  SP integer digit. Low digit of the dividend.
747
 * @param [in] d   SP integer digit. Number to divide by.
748
 * @return  The division result.
749
 */
750
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
751
                                          sp_int_digit d)
752
0
{
753
0
#ifndef _MSC_VER
754
0
    __asm__ __volatile__ (
755
0
        "divq %2"
756
0
        : "+a" (lo)
757
0
        : "d" (hi), "r" (d)
758
0
        : "cc"
759
0
    );
760
0
    return lo;
761
#elif defined(_MSC_VER) && _MSC_VER >= 1920
762
    return _udiv128(hi, lo, d, NULL);
763
#endif
764
0
}
765
#define SP_ASM_DIV_WORD
766
#endif
767
768
#define SP_INT_ASM_AVAILABLE
769
770
    #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
771
772
    #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
773
/*
774
 * CPU: x86
775
 */
776
777
/* Multiply va by vb and store double size result in: vh | vl */
778
#define SP_ASM_MUL(vl, vh, va, vb)                       \
779
    __asm__ __volatile__ (                               \
780
        "movl %[b], %%eax \n\t"                    \
781
        "mull %[a]    \n\t"                    \
782
        "movl %%eax, %[l] \n\t"                    \
783
        "movl %%edx, %[h] \n\t"                    \
784
        : [h] "+r" (vh), [l] "+r" (vl)                   \
785
        : [a] "rm" (va), [b] "rm" (vb)                   \
786
        : "eax", "edx", "cc"                             \
787
    )
788
/* Multiply va by vb and store double size result in: vo | vh | vl */
789
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
790
    __asm__ __volatile__ (                               \
791
        "movl %[b], %%eax \n\t"                    \
792
        "mull %[a]    \n\t"                    \
793
        "movl $0   , %[o] \n\t"                    \
794
        "movl %%eax, %[l] \n\t"                    \
795
        "movl %%edx, %[h] \n\t"                    \
796
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
797
        : [a] "m" (va), [b] "m" (vb)                     \
798
        : "eax", "edx", "cc"                             \
799
    )
800
/* Multiply va by vb and add double size result into: vo | vh | vl */
801
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
802
    __asm__ __volatile__ (                               \
803
        "movl %[b], %%eax \n\t"                    \
804
        "mull %[a]    \n\t"                    \
805
        "addl %%eax, %[l] \n\t"                    \
806
        "adcl %%edx, %[h] \n\t"                    \
807
        "adcl $0   , %[o] \n\t"                    \
808
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
809
        : [a] "rm" (va), [b] "rm" (vb)                   \
810
        : "eax", "edx", "cc"                             \
811
    )
812
/* Multiply va by vb and add double size result into: vh | vl */
813
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
814
    __asm__ __volatile__ (                               \
815
        "movl %[b], %%eax \n\t"                    \
816
        "mull %[a]    \n\t"                    \
817
        "addl %%eax, %[l] \n\t"                    \
818
        "adcl %%edx, %[h] \n\t"                    \
819
        : [l] "+r" (vl), [h] "+r" (vh)                   \
820
        : [a] "rm" (va), [b] "rm" (vb)                   \
821
        : "eax", "edx", "cc"                             \
822
    )
823
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
824
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
825
    __asm__ __volatile__ (                               \
826
        "movl %[b], %%eax \n\t"                    \
827
        "mull %[a]    \n\t"                    \
828
        "addl %%eax, %[l] \n\t"                    \
829
        "adcl %%edx, %[h] \n\t"                    \
830
        "adcl $0   , %[o] \n\t"                    \
831
        "addl %%eax, %[l] \n\t"                    \
832
        "adcl %%edx, %[h] \n\t"                    \
833
        "adcl $0   , %[o] \n\t"                    \
834
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
835
        : [a] "rm" (va), [b] "rm" (vb)                   \
836
        : "eax", "edx", "cc"                             \
837
    )
838
/* Multiply va by vb and add double size result twice into: vo | vh | vl
839
 * Assumes first add will not overflow vh | vl
840
 */
841
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
842
    __asm__ __volatile__ (                               \
843
        "movl %[b], %%eax \n\t"                    \
844
        "mull %[a]    \n\t"                    \
845
        "addl %%eax, %[l] \n\t"                    \
846
        "adcl %%edx, %[h] \n\t"                    \
847
        "addl %%eax, %[l] \n\t"                    \
848
        "adcl %%edx, %[h] \n\t"                    \
849
        "adcl $0   , %[o] \n\t"                    \
850
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
851
        : [a] "rm" (va), [b] "rm" (vb)                   \
852
        : "eax", "edx", "cc"                             \
853
    )
854
/* Square va and store double size result in: vh | vl */
855
#define SP_ASM_SQR(vl, vh, va)                           \
856
    __asm__ __volatile__ (                               \
857
        "movl %[a], %%eax \n\t"                    \
858
        "mull %%eax   \n\t"                    \
859
        "movl %%eax, %[l] \n\t"                    \
860
        "movl %%edx, %[h] \n\t"                    \
861
        : [h] "+r" (vh), [l] "+r" (vl)                   \
862
        : [a] "rm" (va)                                  \
863
        : "eax", "edx", "cc"                             \
864
    )
865
/* Square va and add double size result into: vo | vh | vl */
866
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
867
    __asm__ __volatile__ (                               \
868
        "movl %[a], %%eax \n\t"                    \
869
        "mull %%eax   \n\t"                    \
870
        "addl %%eax, %[l] \n\t"                    \
871
        "adcl %%edx, %[h] \n\t"                    \
872
        "adcl $0   , %[o] \n\t"                    \
873
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
874
        : [a] "rm" (va)                                  \
875
        : "eax", "edx", "cc"                             \
876
    )
877
/* Square va and add double size result into: vh | vl */
878
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
879
    __asm__ __volatile__ (                               \
880
        "movl %[a], %%eax \n\t"                    \
881
        "mull %%eax   \n\t"                    \
882
        "addl %%eax, %[l] \n\t"                    \
883
        "adcl %%edx, %[h] \n\t"                    \
884
        : [l] "+r" (vl), [h] "+r" (vh)                   \
885
        : [a] "rm" (va)                                  \
886
        : "eax", "edx", "cc"                             \
887
    )
888
/* Add va into: vh | vl */
889
#define SP_ASM_ADDC(vl, vh, va)                          \
890
    __asm__ __volatile__ (                               \
891
        "addl %[a], %[l]  \n\t"                    \
892
        "adcl $0  , %[h]  \n\t"                    \
893
        : [l] "+r" (vl), [h] "+r" (vh)                   \
894
        : [a] "rm" (va)                                  \
895
        : "cc"                                           \
896
    )
897
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
898
    __asm__ __volatile__ (                               \
899
        "addl %[a], %[l]  \n\t"                    \
900
        "adcl $0  , %[h]  \n\t"                    \
901
        : [l] "+r" (vl), [h] "+r" (vh)                   \
902
        : [a] "r" (va)                                   \
903
        : "cc"                                           \
904
    )
905
/* Sub va from: vh | vl */
906
#define SP_ASM_SUBB(vl, vh, va)                          \
907
    __asm__ __volatile__ (                               \
908
        "subl %[a], %[l]  \n\t"                    \
909
        "sbbl $0  , %[h]  \n\t"                    \
910
        : [l] "+r" (vl), [h] "+r" (vh)                   \
911
        : [a] "rm" (va)                                  \
912
        : "cc"                                           \
913
    )
914
/* Sub va from: vh | vl */
915
#define SP_ASM_SUBB_REG(vl, vh, va)                      \
916
    __asm__ __volatile__ (                               \
917
        "subl %[a], %[l]  \n\t"                    \
918
        "sbbl $0  , %[h]  \n\t"                    \
919
        : [l] "+r" (vl), [h] "+r" (vh)                   \
920
        : [a] "r" (va)                                   \
921
        : "cc"                                           \
922
    )
923
/* Add two times vc | vb | va into vo | vh | vl */
924
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
925
    __asm__ __volatile__ (                               \
926
        "addl %[a], %[l]  \n\t"                    \
927
        "adcl %[b], %[h]  \n\t"                    \
928
        "adcl %[c], %[o]  \n\t"                    \
929
        "addl %[a], %[l]  \n\t"                    \
930
        "adcl %[b], %[h]  \n\t"                    \
931
        "adcl %[c], %[o]  \n\t"                    \
932
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
933
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
934
        : "cc"                                           \
935
    )
936
/* Index of highest bit set. */
937
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
938
    __asm__ __volatile__ (                               \
939
        "bsr  %[a], %[i]  \n\t"                    \
940
        : [i] "=r" (vi)                                  \
941
        : [a] "r" (va)                                   \
942
        : "cc"                                           \
943
    )
944
945
#ifndef WOLFSSL_SP_DIV_WORD_HALF
946
/* Divide a two digit number by a digit number and return. (hi | lo) / d
947
 *
948
 * Using divl instruction on Intel x64.
949
 *
950
 * @param [in] hi  SP integer digit. High digit of the dividend.
951
 * @param [in] lo  SP integer digit. Low digit of the dividend.
952
 * @param [in] d   SP integer digit. Number to divide by.
953
 * @return  The division result.
954
 */
955
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
956
                                          sp_int_digit d)
957
{
958
    __asm__ __volatile__ (
959
        "divl %2"
960
        : "+a" (lo)
961
        : "d" (hi), "r" (d)
962
        : "cc"
963
    );
964
    return lo;
965
}
966
#define SP_ASM_DIV_WORD
967
#endif
968
969
#define SP_INT_ASM_AVAILABLE
970
971
    #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
972
973
    #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
974
/*
975
 * CPU: Aarch64
976
 */
977
978
/* Multiply va by vb and store double size result in: vh | vl */
979
#define SP_ASM_MUL(vl, vh, va, vb)                       \
980
    __asm__ __volatile__ (                               \
981
        "mul  %[l], %[a], %[b]  \n\t"            \
982
        "umulh  %[h], %[a], %[b]  \n\t"            \
983
        : [h] "+r" (vh), [l] "+r" (vl)                   \
984
        : [a] "r" (va), [b] "r" (vb)                     \
985
        : "cc"                                           \
986
    )
987
/* Multiply va by vb and store double size result in: vo | vh | vl */
988
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
989
    __asm__ __volatile__ (                               \
990
        "mul  x8, %[a], %[b]    \n\t"            \
991
        "umulh  %[h], %[a], %[b]  \n\t"            \
992
        "mov  %[l], x8    \n\t"            \
993
        "mov  %[o], xzr   \n\t"            \
994
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
995
        : [a] "r" (va), [b] "r" (vb)                     \
996
        : "x8", "cc"                                     \
997
    )
998
/* Multiply va by vb and add double size result into: vo | vh | vl */
999
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1000
    __asm__ __volatile__ (                               \
1001
        "mul  x8, %[a], %[b]    \n\t"            \
1002
        "umulh  x9, %[a], %[b]    \n\t"            \
1003
        "adds %[l], %[l], x8    \n\t"            \
1004
        "adcs %[h], %[h], x9    \n\t"            \
1005
        "adc  %[o], %[o], xzr   \n\t"            \
1006
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1007
        : [a] "r" (va), [b] "r" (vb)                     \
1008
        : "x8", "x9", "cc"                               \
1009
    )
1010
/* Multiply va by vb and add double size result into: vh | vl */
1011
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1012
    __asm__ __volatile__ (                               \
1013
        "mul  x8, %[a], %[b]    \n\t"            \
1014
        "umulh  x9, %[a], %[b]    \n\t"            \
1015
        "adds %[l], %[l], x8    \n\t"            \
1016
        "adc  %[h], %[h], x9    \n\t"            \
1017
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1018
        : [a] "r" (va), [b] "r" (vb)                     \
1019
        : "x8", "x9", "cc"                               \
1020
    )
1021
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1022
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1023
    __asm__ __volatile__ (                               \
1024
        "mul  x8, %[a], %[b]    \n\t"            \
1025
        "umulh  x9, %[a], %[b]    \n\t"            \
1026
        "adds %[l], %[l], x8    \n\t"            \
1027
        "adcs %[h], %[h], x9    \n\t"            \
1028
        "adc  %[o], %[o], xzr   \n\t"            \
1029
        "adds %[l], %[l], x8    \n\t"            \
1030
        "adcs %[h], %[h], x9    \n\t"            \
1031
        "adc  %[o], %[o], xzr   \n\t"            \
1032
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1033
        : [a] "r" (va), [b] "r" (vb)                     \
1034
        : "x8", "x9", "cc"                               \
1035
    )
1036
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1037
 * Assumes first add will not overflow vh | vl
1038
 */
1039
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1040
    __asm__ __volatile__ (                               \
1041
        "mul  x8, %[a], %[b]    \n\t"            \
1042
        "umulh  x9, %[a], %[b]    \n\t"            \
1043
        "adds %[l], %[l], x8    \n\t"            \
1044
        "adc  %[h], %[h], x9    \n\t"            \
1045
        "adds %[l], %[l], x8    \n\t"            \
1046
        "adcs %[h], %[h], x9    \n\t"            \
1047
        "adc  %[o], %[o], xzr   \n\t"            \
1048
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1049
        : [a] "r" (va), [b] "r" (vb)                     \
1050
        : "x8", "x9", "cc"                               \
1051
    )
1052
/* Square va and store double size result in: vh | vl */
1053
#define SP_ASM_SQR(vl, vh, va)                           \
1054
    __asm__ __volatile__ (                               \
1055
        "mul  %[l], %[a], %[a]  \n\t"            \
1056
        "umulh  %[h], %[a], %[a]  \n\t"            \
1057
        : [h] "+r" (vh), [l] "+r" (vl)                   \
1058
        : [a] "r" (va)                                   \
1059
        : "cc"                                           \
1060
    )
1061
/* Square va and add double size result into: vo | vh | vl */
1062
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1063
    __asm__ __volatile__ (                               \
1064
        "mul  x8, %[a], %[a]    \n\t"            \
1065
        "umulh  x9, %[a], %[a]    \n\t"            \
1066
        "adds %[l], %[l], x8    \n\t"            \
1067
        "adcs %[h], %[h], x9    \n\t"            \
1068
        "adc  %[o], %[o], xzr   \n\t"            \
1069
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1070
        : [a] "r" (va)                                   \
1071
        : "x8", "x9", "cc"                               \
1072
    )
1073
/* Square va and add double size result into: vh | vl */
1074
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1075
    __asm__ __volatile__ (                               \
1076
        "mul  x8, %[a], %[a]    \n\t"            \
1077
        "umulh  x9, %[a], %[a]    \n\t"            \
1078
        "adds %[l], %[l], x8    \n\t"            \
1079
        "adc  %[h], %[h], x9    \n\t"            \
1080
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1081
        : [a] "r" (va)                                   \
1082
        : "x8", "x9", "cc"                               \
1083
    )
1084
/* Add va into: vh | vl */
1085
#define SP_ASM_ADDC(vl, vh, va)                          \
1086
    __asm__ __volatile__ (                               \
1087
        "adds %[l], %[l], %[a]  \n\t"            \
1088
        "adc  %[h], %[h], xzr   \n\t"            \
1089
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1090
        : [a] "r" (va)                                   \
1091
        : "cc"                                           \
1092
    )
1093
/* Sub va from: vh | vl */
1094
#define SP_ASM_SUBB(vl, vh, va)                          \
1095
    __asm__ __volatile__ (                               \
1096
        "subs %[l], %[l], %[a]  \n\t"            \
1097
        "sbc  %[h], %[h], xzr   \n\t"            \
1098
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1099
        : [a] "r" (va)                                   \
1100
        : "cc"                                           \
1101
    )
1102
/* Add two times vc | vb | va into vo | vh | vl */
1103
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1104
    __asm__ __volatile__ (                               \
1105
        "adds %[l], %[l], %[a]  \n\t"            \
1106
        "adcs %[h], %[h], %[b]  \n\t"            \
1107
        "adc  %[o], %[o], %[c]  \n\t"            \
1108
        "adds %[l], %[l], %[a]  \n\t"            \
1109
        "adcs %[h], %[h], %[b]  \n\t"            \
1110
        "adc  %[o], %[o], %[c]  \n\t"            \
1111
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1112
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
1113
        : "cc"                                           \
1114
    )
1115
/* Count leading zeros. */
1116
#define SP_ASM_LZCNT(va, vn)                             \
1117
    __asm__ __volatile__ (                               \
1118
        "clz  %[n], %[a]  \n\t"                    \
1119
        : [n] "=r" (vn)                                  \
1120
        : [a] "r" (va)                                   \
1121
        :                                                \
1122
    )
1123
1124
#ifndef WOLFSSL_SP_DIV_WORD_HALF
1125
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1126
 *
1127
 * Using udiv instruction on Aarch64.
1128
 * Constant time.
1129
 *
1130
 * @param [in] hi  SP integer digit. High digit of the dividend.
1131
 * @param [in] lo  SP integer digit. Low digit of the dividend.
1132
 * @param [in] d   SP integer digit. Number to divide by.
1133
 * @return  The division result.
1134
 */
1135
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1136
                                          sp_int_digit d)
1137
{
1138
    __asm__ __volatile__ (
1139
        "lsr  x3, %[d], 48\n\t"
1140
        "mov  x5, 16\n\t"
1141
        "cmp  x3, 0\n\t"
1142
        "mov  x4, 63\n\t"
1143
        "csel x3, x5, xzr, eq\n\t"
1144
        "sub  x4, x4, x3\n\t"
1145
        "lsl  %[d], %[d], x3\n\t"
1146
        "lsl  %[hi], %[hi], x3\n\t"
1147
        "lsr  x5, %[lo], x4\n\t"
1148
        "lsl  %[lo], %[lo], x3\n\t"
1149
        "orr  %[hi], %[hi], x5, lsr 1\n\t"
1150
1151
        "lsr  x5, %[d], 32\n\t"
1152
        "add  x5, x5, 1\n\t"
1153
1154
        "udiv x3, %[hi], x5\n\t"
1155
        "lsl  x6, x3, 32\n\t"
1156
        "mul  x4, %[d], x6\n\t"
1157
        "umulh  x3, %[d], x6\n\t"
1158
        "subs %[lo], %[lo], x4\n\t"
1159
        "sbc  %[hi], %[hi], x3\n\t"
1160
1161
        "udiv x3, %[hi], x5\n\t"
1162
        "lsl  x3, x3, 32\n\t"
1163
        "add  x6, x6, x3\n\t"
1164
        "mul  x4, %[d], x3\n\t"
1165
        "umulh  x3, %[d], x3\n\t"
1166
        "subs %[lo], %[lo], x4\n\t"
1167
        "sbc  %[hi], %[hi], x3\n\t"
1168
1169
        "lsr  x3, %[lo], 32\n\t"
1170
        "orr  x3, x3, %[hi], lsl 32\n\t"
1171
1172
        "udiv x3, x3, x5\n\t"
1173
        "add  x6, x6, x3\n\t"
1174
        "mul  x4, %[d], x3\n\t"
1175
        "umulh  x3, %[d], x3\n\t"
1176
        "subs %[lo], %[lo], x4\n\t"
1177
        "sbc  %[hi], %[hi], x3\n\t"
1178
1179
        "lsr  x3, %[lo], 32\n\t"
1180
        "orr  x3, x3, %[hi], lsl 32\n\t"
1181
1182
        "udiv x3, x3, x5\n\t"
1183
        "add  x6, x6, x3\n\t"
1184
        "mul  x4, %[d], x3\n\t"
1185
        "sub  %[lo], %[lo], x4\n\t"
1186
1187
        "udiv x3, %[lo], %[d]\n\t"
1188
        "add  %[hi], x6, x3\n\t"
1189
1190
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1191
        :
1192
        : "x3", "x4", "x5", "x6", "cc"
1193
    );
1194
1195
    return hi;
1196
}
1197
#define SP_ASM_DIV_WORD
1198
#endif
1199
1200
#define SP_INT_ASM_AVAILABLE
1201
1202
    #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
1203
1204
    #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
1205
        SP_WORD_SIZE == 32
1206
/*
1207
 * CPU: ARM32 or Cortex-M4 and similar
1208
 */
1209
1210
/* Multiply va by vb and store double size result in: vh | vl */
1211
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1212
    __asm__ __volatile__ (                               \
1213
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
1214
        : [h] "+r" (vh), [l] "+r" (vl)                   \
1215
        : [a] "r" (va), [b] "r" (vb)                     \
1216
    )
1217
/* Multiply va by vb and store double size result in: vo | vh | vl */
1218
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1219
    __asm__ __volatile__ (                               \
1220
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
1221
        "mov  %[o], #0    \n\t"            \
1222
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
1223
        : [a] "r" (va), [b] "r" (vb)                     \
1224
    )
1225
/* Multiply va by vb and add double size result into: vo | vh | vl */
1226
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1227
    __asm__ __volatile__ (                               \
1228
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1229
        "adds %[l], %[l], r8    \n\t"            \
1230
        "adcs %[h], %[h], r9    \n\t"            \
1231
        "adc  %[o], %[o], #0    \n\t"            \
1232
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1233
        : [a] "r" (va), [b] "r" (vb)                     \
1234
        : "r8", "r9", "cc"                               \
1235
    )
1236
/* Multiply va by vb and add double size result into: vh | vl */
1237
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1238
    __asm__ __volatile__ (                               \
1239
        "umlal  %[l], %[h], %[a], %[b]  \n\t"            \
1240
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1241
        : [a] "r" (va), [b] "r" (vb)                     \
1242
    )
1243
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1244
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1245
    __asm__ __volatile__ (                               \
1246
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1247
        "adds %[l], %[l], r8    \n\t"            \
1248
        "adcs %[h], %[h], r9    \n\t"            \
1249
        "adc  %[o], %[o], #0    \n\t"            \
1250
        "adds %[l], %[l], r8    \n\t"            \
1251
        "adcs %[h], %[h], r9    \n\t"            \
1252
        "adc  %[o], %[o], #0    \n\t"            \
1253
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1254
        : [a] "r" (va), [b] "r" (vb)                     \
1255
        : "r8", "r9", "cc"                               \
1256
    )
1257
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1258
 * Assumes first add will not overflow vh | vl
1259
 */
1260
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1261
    __asm__ __volatile__ (                               \
1262
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1263
        "adds %[l], %[l], r8    \n\t"            \
1264
        "adc  %[h], %[h], r9    \n\t"            \
1265
        "adds %[l], %[l], r8    \n\t"            \
1266
        "adcs %[h], %[h], r9    \n\t"            \
1267
        "adc  %[o], %[o], #0    \n\t"            \
1268
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1269
        : [a] "r" (va), [b] "r" (vb)                     \
1270
        : "r8", "r9", "cc"                               \
1271
    )
1272
/* Square va and store double size result in: vh | vl */
1273
#define SP_ASM_SQR(vl, vh, va)                           \
1274
    __asm__ __volatile__ (                               \
1275
        "umull  %[l], %[h], %[a], %[a]  \n\t"            \
1276
        : [h] "+r" (vh), [l] "+r" (vl)                   \
1277
        : [a] "r" (va)                                   \
1278
    )
1279
/* Square va and add double size result into: vo | vh | vl */
1280
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1281
    __asm__ __volatile__ (                               \
1282
        "umull  r8, r9, %[a], %[a]  \n\t"            \
1283
        "adds %[l], %[l], r8    \n\t"            \
1284
        "adcs %[h], %[h], r9    \n\t"            \
1285
        "adc  %[o], %[o], #0    \n\t"            \
1286
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1287
        : [a] "r" (va)                                   \
1288
        : "r8", "r9", "cc"                               \
1289
    )
1290
/* Square va and add double size result into: vh | vl */
1291
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1292
    __asm__ __volatile__ (                               \
1293
        "umlal  %[l], %[h], %[a], %[a]  \n\t"            \
1294
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1295
        : [a] "r" (va)                                   \
1296
        : "cc"                                           \
1297
    )
1298
/* Add va into: vh | vl */
1299
#define SP_ASM_ADDC(vl, vh, va)                          \
1300
    __asm__ __volatile__ (                               \
1301
        "adds %[l], %[l], %[a]  \n\t"            \
1302
        "adc  %[h], %[h], #0    \n\t"            \
1303
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1304
        : [a] "r" (va)                                   \
1305
        : "cc"                                           \
1306
    )
1307
/* Sub va from: vh | vl */
1308
#define SP_ASM_SUBB(vl, vh, va)                          \
1309
    __asm__ __volatile__ (                               \
1310
        "subs %[l], %[l], %[a]  \n\t"            \
1311
        "sbc  %[h], %[h], #0    \n\t"            \
1312
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1313
        : [a] "r" (va)                                   \
1314
        : "cc"                                           \
1315
    )
1316
/* Add two times vc | vb | va into vo | vh | vl */
1317
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1318
    __asm__ __volatile__ (                               \
1319
        "adds %[l], %[l], %[a]  \n\t"            \
1320
        "adcs %[h], %[h], %[b]  \n\t"            \
1321
        "adc  %[o], %[o], %[c]  \n\t"            \
1322
        "adds %[l], %[l], %[a]  \n\t"            \
1323
        "adcs %[h], %[h], %[b]  \n\t"            \
1324
        "adc  %[o], %[o], %[c]  \n\t"            \
1325
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1326
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
1327
        : "cc"                                           \
1328
    )
1329
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 7)
1330
/* Count leading zeros - instruction only available on ARMv7 and newer. */
1331
#define SP_ASM_LZCNT(va, vn)                             \
1332
    __asm__ __volatile__ (                               \
1333
        "clz  %[n], %[a]  \n\t"                    \
1334
        : [n] "=r" (vn)                                  \
1335
        : [a] "r" (va)                                   \
1336
    )
1337
#endif
1338
1339
#ifndef WOLFSSL_SP_DIV_WORD_HALF
1340
#ifndef WOLFSSL_SP_ARM32_UDIV
1341
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1342
 *
1343
 * No division instruction used - does operation bit by bit.
1344
 * Constant time.
1345
 *
1346
 * @param [in] hi  SP integer digit. High digit of the dividend.
1347
 * @param [in] lo  SP integer digit. Low digit of the dividend.
1348
 * @param [in] d   SP integer digit. Number to divide by.
1349
 * @return  The division result.
1350
 */
1351
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1352
                                          sp_int_digit d)
1353
{
1354
    sp_int_digit r = 0;
1355
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1356
    static const char debruijn32[32] = {
1357
        0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
1358
        1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
1359
    };
1360
    static const sp_uint32 debruijn32_mul = 0x076be629;
1361
#endif
1362
1363
    __asm__ __volatile__ (
1364
        /* Shift d so that top bit is set. */
1365
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1366
        "ldr  r4, %[m]\n\t"
1367
        "mov  r5, %[d]\n\t"
1368
        "orr  r5, r5, r5, lsr #1\n\t"
1369
        "orr  r5, r5, r5, lsr #2\n\t"
1370
        "orr  r5, r5, r5, lsr #4\n\t"
1371
        "orr  r5, r5, r5, lsr #8\n\t"
1372
        "orr  r5, r5, r5, lsr #16\n\t"
1373
        "add  r5, r5, #1\n\t"
1374
        "mul  r6, r5, r4\n\t"
1375
        "lsr  r5, r6, #27\n\t"
1376
        "ldrb r5, [%[t], r5]\n\t"
1377
#else
1378
        "clz  r5, %[d]\n\t"
1379
#endif
1380
        "rsb  r6, r5, #31\n\t"
1381
        "lsl  %[d], %[d], r5\n\t"
1382
        "lsl  %[hi], %[hi], r5\n\t"
1383
        "lsr  r9, %[lo], r6\n\t"
1384
        "lsl  %[lo], %[lo], r5\n\t"
1385
        "orr  %[hi], %[hi], r9, lsr #1\n\t"
1386
1387
        "lsr  r5, %[d], #1\n\t"
1388
        "add  r5, r5, #1\n\t"
1389
        "mov  r6, %[lo]\n\t"
1390
        "mov  r9, %[hi]\n\t"
1391
        /* Do top 32 */
1392
        "subs r8, r5, r9\n\t"
1393
        "sbc  r8, r8, r8\n\t"
1394
        "add  %[r], %[r], %[r]\n\t"
1395
        "sub  %[r], %[r], r8\n\t"
1396
        "and  r8, r8, r5\n\t"
1397
        "subs r9, r9, r8\n\t"
1398
        /* Next 30 bits */
1399
        "mov  r4, #29\n\t"
1400
        "\n1:\n\t"
1401
        "movs r6, r6, lsl #1\n\t"
1402
        "adc  r9, r9, r9\n\t"
1403
        "subs r8, r5, r9\n\t"
1404
        "sbc  r8, r8, r8\n\t"
1405
        "add  %[r], %[r], %[r]\n\t"
1406
        "sub  %[r], %[r], r8\n\t"
1407
        "and  r8, r8, r5\n\t"
1408
        "subs r9, r9, r8\n\t"
1409
        "subs r4, r4, #1\n\t"
1410
        "bpl  1b\n\t"
1411
1412
        "add  %[r], %[r], %[r]\n\t"
1413
        "add  %[r], %[r], #1\n\t"
1414
1415
        /* Handle difference has hi word > 0. */
1416
        "umull  r4, r5, %[r], %[d]\n\t"
1417
        "subs r4, %[lo], r4\n\t"
1418
        "sbc  r5, %[hi], r5\n\t"
1419
        "add  %[r], %[r], r5\n\t"
1420
        "umull  r4, r5, %[r], %[d]\n\t"
1421
        "subs r4, %[lo], r4\n\t"
1422
        "sbc  r5, %[hi], r5\n\t"
1423
        "add  %[r], %[r], r5\n\t"
1424
1425
        /* Add 1 to result if bottom half of difference is >= d. */
1426
        "mul  r4, %[r], %[d]\n\t"
1427
        "subs r4, %[lo], r4\n\t"
1428
        "subs r9, %[d], r4\n\t"
1429
        "sbc  r8, r8, r8\n\t"
1430
        "sub  %[r], %[r], r8\n\t"
1431
        "subs r9, r9, #1\n\t"
1432
        "sbc  r8, r8, r8\n\t"
1433
        "sub  %[r], %[r], r8\n\t"
1434
        : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1435
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1436
        : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
1437
#else
1438
        :
1439
#endif
1440
        : "r4", "r5", "r6", "r8", "r9", "cc"
1441
    );
1442
1443
    return r;
1444
}
1445
#else
1446
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1447
 *
1448
 * Using udiv instruction on ARM32.
1449
 * Constant time.
1450
 *
1451
 * @param [in] hi  SP integer digit. High digit of the dividend.
1452
 * @param [in] lo  SP integer digit. Low digit of the dividend.
1453
 * @param [in] d   SP integer digit. Number to divide by.
1454
 * @return  The division result.
1455
 */
1456
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1457
                                          sp_int_digit d)
1458
{
1459
    __asm__ __volatile__ (
1460
        "lsrs r3, %[d], #24\n\t"
1461
  "it eq\n\t"
1462
        "moveq  r3, #8\n\t"
1463
  "it ne\n\t"
1464
        "movne  r3, #0\n\t"
1465
        "rsb  r4, r3, #31\n\t"
1466
        "lsl  %[d], %[d], r3\n\t"
1467
        "lsl  %[hi], %[hi], r3\n\t"
1468
        "lsr  r5, %[lo], r4\n\t"
1469
        "lsl  %[lo], %[lo], r3\n\t"
1470
        "orr  %[hi], %[hi], r5, lsr #1\n\t"
1471
1472
        "lsr  r5, %[d], 16\n\t"
1473
        "add  r5, r5, 1\n\t"
1474
1475
        "udiv r3, %[hi], r5\n\t"
1476
        "lsl  r6, r3, 16\n\t"
1477
        "umull  r4, r3, %[d], r6\n\t"
1478
        "subs %[lo], %[lo], r4\n\t"
1479
        "sbc  %[hi], %[hi], r3\n\t"
1480
1481
        "udiv r3, %[hi], r5\n\t"
1482
        "lsl  r3, r3, 16\n\t"
1483
        "add  r6, r6, r3\n\t"
1484
        "umull  r4, r3, %[d], r3\n\t"
1485
        "subs %[lo], %[lo], r4\n\t"
1486
        "sbc  %[hi], %[hi], r3\n\t"
1487
1488
        "lsr  r3, %[lo], 16\n\t"
1489
        "orr  r3, r3, %[hi], lsl 16\n\t"
1490
1491
        "udiv r3, r3, r5\n\t"
1492
        "add  r6, r6, r3\n\t"
1493
        "umull  r4, r3, %[d], r3\n\t"
1494
        "subs %[lo], %[lo], r4\n\t"
1495
        "sbc  %[hi], %[hi], r3\n\t"
1496
1497
        "lsr  r3, %[lo], 16\n\t"
1498
        "orr  r3, r3, %[hi], lsl 16\n\t"
1499
1500
        "udiv r3, r3, r5\n\t"
1501
        "add  r6, r6, r3\n\t"
1502
        "mul  r4, %[d], r3\n\t"
1503
        "sub  %[lo], %[lo], r4\n\t"
1504
1505
        "udiv r3, %[lo], %[d]\n\t"
1506
        "add  %[hi], r6, r3\n\t"
1507
1508
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1509
        :
1510
        : "r3", "r4", "r5", "r6", "cc"
1511
    );
1512
1513
    return hi;
1514
}
1515
#endif
1516
1517
#define SP_ASM_DIV_WORD
1518
#endif
1519
1520
#define SP_INT_ASM_AVAILABLE
1521
1522
    #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
1523
1524
    #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
1525
/*
1526
 * CPU: ARM Thumb (like Cortex-M0)
1527
 */
1528
1529
/* Compile with -fomit-frame-pointer, or similar, if compiler complains about
1530
 * usage of register 'r7'.
1531
 */
1532
1533
#if defined(__clang__)
1534
1535
/* Multiply va by vb and store double size result in: vh | vl */
1536
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1537
    __asm__ __volatile__ (                               \
1538
        /* al * bl */                                    \
1539
        "uxth r6, %[a]    \n\t"            \
1540
        "uxth %[l], %[b]    \n\t"            \
1541
        "muls %[l], r6    \n\t"            \
1542
        /* al * bh */                                    \
1543
        "lsrs r4, %[b], #16   \n\t"            \
1544
        "muls r6, r4      \n\t"            \
1545
        "lsrs %[h], r6, #16   \n\t"            \
1546
        "lsls r6, r6, #16   \n\t"            \
1547
        "adds %[l], %[l], r6    \n\t"            \
1548
        "movs r5, #0      \n\t"            \
1549
        "adcs %[h], r5    \n\t"            \
1550
        /* ah * bh */                                    \
1551
        "lsrs r6, %[a], #16   \n\t"            \
1552
        "muls r4, r6      \n\t"            \
1553
        "adds %[h], %[h], r4    \n\t"            \
1554
        /* ah * bl */                                    \
1555
        "uxth r4, %[b]    \n\t"            \
1556
        "muls r6, r4      \n\t"            \
1557
        "lsrs r4, r6, #16   \n\t"            \
1558
        "lsls r6, r6, #16   \n\t"            \
1559
        "adds %[l], %[l], r6    \n\t"            \
1560
        "adcs %[h], r4    \n\t"            \
1561
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1562
        : [a] "l" (va), [b] "l" (vb)                     \
1563
        : "r4", "r5", "r6", "cc"                         \
1564
    )
1565
/* Multiply va by vb and store double size result in: vo | vh | vl */
1566
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1567
    __asm__ __volatile__ (                               \
1568
        /* al * bl */                                    \
1569
        "uxth r6, %[a]    \n\t"            \
1570
        "uxth %[l], %[b]    \n\t"            \
1571
        "muls %[l], r6    \n\t"            \
1572
        /* al * bh */                                    \
1573
        "lsrs r5, %[b], #16   \n\t"            \
1574
        "muls r6, r5      \n\t"            \
1575
        "lsrs %[h], r6, #16   \n\t"            \
1576
        "lsls r6, r6, #16   \n\t"            \
1577
        "adds %[l], %[l], r6    \n\t"            \
1578
        "movs %[o], #0    \n\t"            \
1579
        "adcs %[h], %[o]    \n\t"            \
1580
        /* ah * bh */                                    \
1581
        "lsrs r6, %[a], #16   \n\t"            \
1582
        "muls r5, r6      \n\t"            \
1583
        "adds %[h], %[h], r5    \n\t"            \
1584
        /* ah * bl */                                    \
1585
        "uxth r5, %[b]    \n\t"            \
1586
        "muls r6, r5      \n\t"            \
1587
        "lsrs r5, r6, #16   \n\t"            \
1588
        "lsls r6, r6, #16   \n\t"            \
1589
        "adds %[l], %[l], r6    \n\t"            \
1590
        "adcs %[h], r5    \n\t"            \
1591
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1592
        : [a] "l" (va), [b] "l" (vb)                     \
1593
        : "r5", "r6", "cc"                               \
1594
    )
1595
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1596
/* Multiply va by vb and add double size result into: vo | vh | vl */
1597
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1598
    __asm__ __volatile__ (                               \
1599
        /* al * bl */                                    \
1600
        "uxth r6, %[a]    \n\t"            \
1601
        "uxth r7, %[b]    \n\t"            \
1602
        "muls r7, r6      \n\t"            \
1603
        "adds %[l], %[l], r7    \n\t"            \
1604
        "movs r5, #0      \n\t"            \
1605
        "adcs %[h], r5    \n\t"            \
1606
        "adcs %[o], r5    \n\t"            \
1607
        /* al * bh */                                    \
1608
        "lsrs r7, %[b], #16   \n\t"            \
1609
        "muls r6, r7      \n\t"            \
1610
        "lsrs r7, r6, #16   \n\t"            \
1611
        "lsls r6, r6, #16   \n\t"            \
1612
        "adds %[l], %[l], r6    \n\t"            \
1613
        "adcs %[h], r7    \n\t"            \
1614
        "adcs %[o], r5    \n\t"            \
1615
        /* ah * bh */                                    \
1616
        "lsrs r6, %[a], #16   \n\t"            \
1617
        "lsrs r7, %[b], #16   \n\t"            \
1618
        "muls r7, r6      \n\t"            \
1619
        "adds %[h], %[h], r7    \n\t"            \
1620
        "adcs %[o], r5    \n\t"            \
1621
        /* ah * bl */                                    \
1622
        "uxth r7, %[b]    \n\t"            \
1623
        "muls r6, r7      \n\t"            \
1624
        "lsrs r7, r6, #16   \n\t"            \
1625
        "lsls r6, r6, #16   \n\t"            \
1626
        "adds %[l], %[l], r6    \n\t"            \
1627
        "adcs %[h], r7    \n\t"            \
1628
        "adcs %[o], r5    \n\t"            \
1629
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1630
        : [a] "l" (va), [b] "l" (vb)                     \
1631
        : "r5", "r6", "r7", "cc"                         \
1632
    )
1633
#else
1634
/* Multiply va by vb and add double size result into: vo | vh | vl */
1635
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1636
    __asm__ __volatile__ (                               \
1637
        /* al * bl */                                    \
1638
        "uxth r6, %[a]    \n\t"            \
1639
        "uxth r5, %[b]    \n\t"            \
1640
        "muls r5, r6      \n\t"            \
1641
        "adds %[l], %[l], r5    \n\t"            \
1642
        "movs r5, #0      \n\t"            \
1643
        "adcs %[h], r5    \n\t"            \
1644
        "adcs %[o], r5    \n\t"            \
1645
        /* al * bh */                                    \
1646
        "lsrs r5, %[b], #16   \n\t"            \
1647
        "muls r6, r5      \n\t"            \
1648
        "lsrs r5, r6, #16   \n\t"            \
1649
        "lsls r6, r6, #16   \n\t"            \
1650
        "adds %[l], %[l], r6    \n\t"            \
1651
        "adcs %[h], r5    \n\t"            \
1652
        "movs r5, #0      \n\t"            \
1653
        "adcs %[o], r5    \n\t"            \
1654
        /* ah * bh */                                    \
1655
        "lsrs r6, %[a], #16   \n\t"            \
1656
        "lsrs r5, %[b], #16   \n\t"            \
1657
        "muls r5, r6      \n\t"            \
1658
        "adds %[h], %[h], r5    \n\t"            \
1659
        "movs r5, #0      \n\t"            \
1660
        "adcs %[o], r5    \n\t"            \
1661
        /* ah * bl */                                    \
1662
        "uxth r5, %[b]    \n\t"            \
1663
        "muls r6, r5      \n\t"            \
1664
        "lsrs r5, r6, #16   \n\t"            \
1665
        "lsls r6, r6, #16   \n\t"            \
1666
        "adds %[l], %[l], r6    \n\t"            \
1667
        "adcs %[h], r5    \n\t"            \
1668
        "movs r5, #0      \n\t"            \
1669
        "adcs %[o], r5    \n\t"            \
1670
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1671
        : [a] "l" (va), [b] "l" (vb)                     \
1672
        : "r5", "r6", "cc"                               \
1673
    )
1674
#endif
1675
/* Multiply va by vb and add double size result into: vh | vl */
1676
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1677
    __asm__ __volatile__ (                               \
1678
        /* al * bl */                                    \
1679
        "uxth r6, %[a]    \n\t"            \
1680
        "uxth r4, %[b]    \n\t"            \
1681
        "muls r4, r6      \n\t"            \
1682
        "adds %[l], %[l], r4    \n\t"            \
1683
        "movs r5, #0      \n\t"            \
1684
        "adcs %[h], r5    \n\t"            \
1685
        /* al * bh */                                    \
1686
        "lsrs r4, %[b], #16   \n\t"            \
1687
        "muls r6, r4      \n\t"            \
1688
        "lsrs r4, r6, #16   \n\t"            \
1689
        "lsls r6, r6, #16   \n\t"            \
1690
        "adds %[l], %[l], r6    \n\t"            \
1691
        "adcs %[h], r4    \n\t"            \
1692
        /* ah * bh */                                    \
1693
        "lsrs r6, %[a], #16   \n\t"            \
1694
        "lsrs r4, %[b], #16   \n\t"            \
1695
        "muls r4, r6      \n\t"            \
1696
        "adds %[h], %[h], r4    \n\t"            \
1697
        /* ah * bl */                                    \
1698
        "uxth r4, %[b]    \n\t"            \
1699
        "muls r6, r4      \n\t"            \
1700
        "lsrs r4, r6, #16   \n\t"            \
1701
        "lsls r6, r6, #16   \n\t"            \
1702
        "adds %[l], %[l], r6    \n\t"            \
1703
        "adcs %[h], r4    \n\t"            \
1704
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1705
        : [a] "l" (va), [b] "l" (vb)                     \
1706
        : "r4", "r5", "r6", "cc"                         \
1707
    )
1708
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1709
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1710
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1711
    __asm__ __volatile__ (                               \
1712
        /* al * bl */                                    \
1713
        "uxth r6, %[a]    \n\t"            \
1714
        "uxth r7, %[b]    \n\t"            \
1715
        "muls r7, r6      \n\t"            \
1716
        "adds %[l], %[l], r7    \n\t"            \
1717
        "movs r5, #0      \n\t"            \
1718
        "adcs %[h], r5    \n\t"            \
1719
        "adcs %[o], r5    \n\t"            \
1720
        "adds %[l], %[l], r7    \n\t"            \
1721
        "adcs %[h], r5    \n\t"            \
1722
        "adcs %[o], r5    \n\t"            \
1723
        /* al * bh */                                    \
1724
        "lsrs r7, %[b], #16   \n\t"            \
1725
        "muls r6, r7      \n\t"            \
1726
        "lsrs r7, r6, #16   \n\t"            \
1727
        "lsls r6, r6, #16   \n\t"            \
1728
        "adds %[l], %[l], r6    \n\t"            \
1729
        "adcs %[h], r7    \n\t"            \
1730
        "adcs %[o], r5    \n\t"            \
1731
        "adds %[l], %[l], r6    \n\t"            \
1732
        "adcs %[h], r7    \n\t"            \
1733
        "adcs %[o], r5    \n\t"            \
1734
        /* ah * bh */                                    \
1735
        "lsrs r6, %[a], #16   \n\t"            \
1736
        "lsrs r7, %[b], #16   \n\t"            \
1737
        "muls r7, r6      \n\t"            \
1738
        "adds %[h], %[h], r7    \n\t"            \
1739
        "adcs %[o], r5    \n\t"            \
1740
        "adds %[h], %[h], r7    \n\t"            \
1741
        "adcs %[o], r5    \n\t"            \
1742
        /* ah * bl */                                    \
1743
        "uxth r7, %[b]    \n\t"            \
1744
        "muls r6, r7      \n\t"            \
1745
        "lsrs r7, r6, #16   \n\t"            \
1746
        "lsls r6, r6, #16   \n\t"            \
1747
        "adds %[l], %[l], r6    \n\t"            \
1748
        "adcs %[h], r7    \n\t"            \
1749
        "adcs %[o], r5    \n\t"            \
1750
        "adds %[l], %[l], r6    \n\t"            \
1751
        "adcs %[h], r7    \n\t"            \
1752
        "adcs %[o], r5    \n\t"            \
1753
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1754
        : [a] "l" (va), [b] "l" (vb)                     \
1755
        : "r5", "r6", "r7", "cc"                         \
1756
    )
1757
#else
1758
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1759
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1760
    __asm__ __volatile__ (                               \
1761
        "movs r8, %[a]    \n\t"            \
1762
        /* al * bl */                                    \
1763
        "uxth r6, %[a]    \n\t"            \
1764
        "uxth r5, %[b]    \n\t"            \
1765
        "muls r5, r6      \n\t"            \
1766
        "adds %[l], %[l], r5    \n\t"            \
1767
        "movs %[a], #0    \n\t"            \
1768
        "adcs %[h], %[a]    \n\t"            \
1769
        "adcs %[o], %[a]    \n\t"            \
1770
        "adds %[l], %[l], r5    \n\t"            \
1771
        "adcs %[h], %[a]    \n\t"            \
1772
        "adcs %[o], %[a]    \n\t"            \
1773
        /* al * bh */                                    \
1774
        "lsrs r5, %[b], #16   \n\t"            \
1775
        "muls r6, r5      \n\t"            \
1776
        "lsrs r5, r6, #16   \n\t"            \
1777
        "lsls r6, r6, #16   \n\t"            \
1778
        "adds %[l], %[l], r6    \n\t"            \
1779
        "adcs %[h], r5    \n\t"            \
1780
        "adcs %[o], %[a]    \n\t"            \
1781
        "adds %[l], %[l], r6    \n\t"            \
1782
        "adcs %[h], r5    \n\t"            \
1783
        "adcs %[o], %[a]    \n\t"            \
1784
        /* ah * bh */                                    \
1785
        "movs %[a], r8    \n\t"            \
1786
        "lsrs r6, %[a], #16   \n\t"            \
1787
        "lsrs r5, %[b], #16   \n\t"            \
1788
        "muls r5, r6      \n\t"            \
1789
        "adds %[h], %[h], r5    \n\t"            \
1790
        "movs %[a], #0    \n\t"            \
1791
        "adcs %[o], %[a]    \n\t"            \
1792
        "adds %[h], %[h], r5    \n\t"            \
1793
        "adcs %[o], %[a]    \n\t"            \
1794
        /* ah * bl */                                    \
1795
        "uxth r5, %[b]    \n\t"            \
1796
        "muls r6, r5      \n\t"            \
1797
        "lsrs r5, r6, #16   \n\t"            \
1798
        "lsls r6, r6, #16   \n\t"            \
1799
        "adds %[l], %[l], r6    \n\t"            \
1800
        "adcs %[h], r5    \n\t"            \
1801
        "adcs %[o], %[a]    \n\t"            \
1802
        "adds %[l], %[l], r6    \n\t"            \
1803
        "adcs %[h], r5    \n\t"            \
1804
        "adcs %[o], %[a]    \n\t"            \
1805
        "movs %[a], r8    \n\t"            \
1806
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1807
        : [a] "l" (va), [b] "l" (vb)                     \
1808
        : "r5", "r6", "r8", "cc"                         \
1809
    )
1810
#endif
1811
#ifndef DEBUG
1812
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1813
 * Assumes first add will not overflow vh | vl
1814
 */
1815
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1816
    __asm__ __volatile__ (                               \
1817
        /* al * bl */                                    \
1818
        "uxth r6, %[a]    \n\t"            \
1819
        "uxth r7, %[b]    \n\t"            \
1820
        "muls r7, r6      \n\t"            \
1821
        "adds %[l], %[l], r7    \n\t"            \
1822
        "movs r5, #0      \n\t"            \
1823
        "adcs %[h], r5    \n\t"            \
1824
        "adds %[l], %[l], r7    \n\t"            \
1825
        "adcs %[h], r5    \n\t"            \
1826
        /* al * bh */                                    \
1827
        "lsrs r7, %[b], #16   \n\t"            \
1828
        "muls r6, r7      \n\t"            \
1829
        "lsrs r7, r6, #16   \n\t"            \
1830
        "lsls r6, r6, #16   \n\t"            \
1831
        "adds %[l], %[l], r6    \n\t"            \
1832
        "adcs %[h], r7    \n\t"            \
1833
        "adds %[l], %[l], r6    \n\t"            \
1834
        "adcs %[h], r7    \n\t"            \
1835
        "adcs %[o], r5    \n\t"            \
1836
        /* ah * bh */                                    \
1837
        "lsrs r6, %[a], #16   \n\t"            \
1838
        "lsrs r7, %[b], #16   \n\t"            \
1839
        "muls r7, r6      \n\t"            \
1840
        "adds %[h], %[h], r7    \n\t"            \
1841
        "adcs %[o], r5    \n\t"            \
1842
        "adds %[h], %[h], r7    \n\t"            \
1843
        "adcs %[o], r5    \n\t"            \
1844
        /* ah * bl */                                    \
1845
        "uxth r7, %[b]    \n\t"            \
1846
        "muls r6, r7      \n\t"            \
1847
        "lsrs r7, r6, #16   \n\t"            \
1848
        "lsls r6, r6, #16   \n\t"            \
1849
        "adds %[l], %[l], r6    \n\t"            \
1850
        "adcs %[h], r7    \n\t"            \
1851
        "adcs %[o], r5    \n\t"            \
1852
        "adds %[l], %[l], r6    \n\t"            \
1853
        "adcs %[h], r7    \n\t"            \
1854
        "adcs %[o], r5    \n\t"            \
1855
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1856
        : [a] "l" (va), [b] "l" (vb)                     \
1857
        : "r5", "r6", "r7", "cc"                         \
1858
    )
1859
#else
1860
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1861
 * Assumes first add will not overflow vh | vl
1862
 */
1863
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1864
    __asm__ __volatile__ (                               \
1865
        "movs r8, %[a]    \n\t"            \
1866
        /* al * bl */                                    \
1867
        "uxth r5, %[a]    \n\t"            \
1868
        "uxth r6, %[b]    \n\t"            \
1869
        "muls r6, r5      \n\t"            \
1870
        "adds %[l], %[l], r6    \n\t"            \
1871
        "movs %[a], #0    \n\t"            \
1872
        "adcs %[h], %[a]    \n\t"            \
1873
        "adds %[l], %[l], r6    \n\t"            \
1874
        "adcs %[h], %[a]    \n\t"            \
1875
        /* al * bh */                                    \
1876
        "lsrs r6, %[b], #16   \n\t"            \
1877
        "muls r5, r6      \n\t"            \
1878
        "lsrs r6, r5, #16   \n\t"            \
1879
        "lsls r5, r5, #16   \n\t"            \
1880
        "adds %[l], %[l], r5    \n\t"            \
1881
        "adcs %[h], r6    \n\t"            \
1882
        "adds %[l], %[l], r5    \n\t"            \
1883
        "adcs %[h], r6    \n\t"            \
1884
        "adcs %[o], %[a]    \n\t"            \
1885
        /* ah * bh */                                    \
1886
        "movs %[a], r8    \n\t"            \
1887
        "lsrs r5, %[a], #16   \n\t"            \
1888
        "lsrs r6, %[b], #16   \n\t"            \
1889
        "muls r6, r5      \n\t"            \
1890
        "movs %[a], #0    \n\t"            \
1891
        "adds %[h], %[h], r6    \n\t"            \
1892
        "adcs %[o], %[a]    \n\t"            \
1893
        "adds %[h], %[h], r6    \n\t"            \
1894
        "adcs %[o], %[a]    \n\t"            \
1895
        /* ah * bl */                                    \
1896
        "uxth r6, %[b]    \n\t"            \
1897
        "muls r5, r6      \n\t"            \
1898
        "lsrs r6, r5, #16   \n\t"            \
1899
        "lsls r5, r5, #16   \n\t"            \
1900
        "adds %[l], %[l], r5    \n\t"            \
1901
        "adcs %[h], r6    \n\t"            \
1902
        "adcs %[o], %[a]    \n\t"            \
1903
        "adds %[l], %[l], r5    \n\t"            \
1904
        "adcs %[h], r6    \n\t"            \
1905
        "adcs %[o], %[a]    \n\t"            \
1906
        "movs %[a], r8    \n\t"            \
1907
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1908
        : [a] "l" (va), [b] "l" (vb)                     \
1909
        : "r5", "r6", "r8", "cc"                         \
1910
    )
1911
#endif
1912
/* Square va and store double size result in: vh | vl */
1913
#define SP_ASM_SQR(vl, vh, va)                           \
1914
    __asm__ __volatile__ (                               \
1915
        "lsrs r5, %[a], #16   \n\t"            \
1916
        "uxth r6, %[a]    \n\t"            \
1917
        "mov  %[l], r6    \n\t"            \
1918
        "mov  %[h], r5    \n\t"            \
1919
        /* al * al */                                    \
1920
        "muls %[l], %[l]    \n\t"            \
1921
        /* ah * ah */                                    \
1922
        "muls %[h], %[h]    \n\t"            \
1923
        /* 2 * al * ah */                                \
1924
        "muls r6, r5      \n\t"            \
1925
        "lsrs r5, r6, #15   \n\t"            \
1926
        "lsls r6, r6, #17   \n\t"            \
1927
        "adds %[l], %[l], r6    \n\t"            \
1928
        "adcs %[h], r5    \n\t"            \
1929
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1930
        : [a] "l" (va)                                   \
1931
        : "r5", "r6", "cc"                               \
1932
    )
1933
/* Square va and add double size result into: vo | vh | vl */
1934
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1935
    __asm__ __volatile__ (                               \
1936
        "lsrs r4, %[a], #16   \n\t"            \
1937
        "uxth r6, %[a]    \n\t"            \
1938
        /* al * al */                                    \
1939
        "muls r6, r6      \n\t"            \
1940
        /* ah * ah */                                    \
1941
        "muls r4, r4      \n\t"            \
1942
        "adds %[l], %[l], r6    \n\t"            \
1943
        "adcs %[h], r4    \n\t"            \
1944
        "movs r5, #0      \n\t"            \
1945
        "adcs %[o], r5    \n\t"            \
1946
        "lsrs r4, %[a], #16   \n\t"            \
1947
        "uxth r6, %[a]    \n\t"            \
1948
        /* 2 * al * ah */                                \
1949
        "muls r6, r4      \n\t"            \
1950
        "lsrs r4, r6, #15   \n\t"            \
1951
        "lsls r6, r6, #17   \n\t"            \
1952
        "adds %[l], %[l], r6    \n\t"            \
1953
        "adcs %[h], r4    \n\t"            \
1954
        "adcs %[o], r5    \n\t"            \
1955
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1956
        : [a] "l" (va)                                   \
1957
        : "r4", "r5", "r6", "cc"                         \
1958
    )
1959
/* Square va and add double size result into: vh | vl */
1960
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1961
    __asm__ __volatile__ (                               \
1962
        "lsrs r6, %[a], #16   \n\t"            \
1963
        "uxth r6, %[a]    \n\t"            \
1964
        /* al * al */                                    \
1965
        "muls r6, r6      \n\t"            \
1966
        /* ah * ah */                                    \
1967
        "muls r6, r6      \n\t"            \
1968
        "adds %[l], %[l], r6    \n\t"            \
1969
        "adcs %[h], r6    \n\t"            \
1970
        "lsrs r6, %[a], #16   \n\t"            \
1971
        "uxth r6, %[a]    \n\t"            \
1972
        /* 2 * al * ah */                                \
1973
        "muls r6, r6      \n\t"            \
1974
        "lsrs r6, r6, #15   \n\t"            \
1975
        "lsls r6, r6, #17   \n\t"            \
1976
        "adds %[l], %[l], r6    \n\t"            \
1977
        "adcs %[h], r6    \n\t"            \
1978
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1979
        : [a] "l" (va)                                   \
1980
        : "r5", "r6", "cc"                               \
1981
    )
1982
/* Add va into: vh | vl */
1983
#define SP_ASM_ADDC(vl, vh, va)                          \
1984
    __asm__ __volatile__ (                               \
1985
        "adds %[l], %[l], %[a]  \n\t"            \
1986
        "movs r5, #0      \n\t"            \
1987
        "adcs %[h], r5    \n\t"            \
1988
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1989
        : [a] "l" (va)                                   \
1990
        : "r5", "cc"                                     \
1991
    )
1992
/* Sub va from: vh | vl */
1993
#define SP_ASM_SUBB(vl, vh, va)                          \
1994
    __asm__ __volatile__ (                               \
1995
        "subs %[l], %[l], %[a]  \n\t"            \
1996
        "movs r5, #0      \n\t"            \
1997
        "sbcs %[h], r5    \n\t"            \
1998
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1999
        : [a] "l" (va)                                   \
2000
        : "r5", "cc"                                     \
2001
    )
2002
/* Add two times vc | vb | va into vo | vh | vl */
2003
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2004
    __asm__ __volatile__ (                               \
2005
        "adds %[l], %[l], %[a]  \n\t"            \
2006
        "adcs %[h], %[b]    \n\t"            \
2007
        "adcs %[o], %[c]    \n\t"            \
2008
        "adds %[l], %[l], %[a]  \n\t"            \
2009
        "adcs %[h], %[b]    \n\t"            \
2010
        "adcs %[o], %[c]    \n\t"            \
2011
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2012
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2013
        : "cc"                                           \
2014
    )
2015
2016
#elif defined(WOLFSSL_KEIL)
2017
2018
/* Multiply va by vb and store double size result in: vh | vl */
2019
#define SP_ASM_MUL(vl, vh, va, vb)                       \
2020
    __asm__ __volatile__ (                               \
2021
        /* al * bl */                                    \
2022
        "uxth r6, %[a]    \n\t"            \
2023
        "uxth %[l], %[b]    \n\t"            \
2024
        "muls %[l], r6, %[l]    \n\t"            \
2025
        /* al * bh */                                    \
2026
        "lsrs r4, %[b], #16   \n\t"            \
2027
        "muls r6, r4, r6    \n\t"            \
2028
        "lsrs %[h], r6, #16   \n\t"            \
2029
        "lsls r6, r6, #16   \n\t"            \
2030
        "adds %[l], %[l], r6    \n\t"            \
2031
        "movs r5, #0      \n\t"            \
2032
        "adcs %[h], %[h], r5    \n\t"            \
2033
        /* ah * bh */                                    \
2034
        "lsrs r6, %[a], #16   \n\t"            \
2035
        "muls r4, r6, r4    \n\t"            \
2036
        "adds %[h], %[h], r4    \n\t"            \
2037
        /* ah * bl */                                    \
2038
        "uxth r4, %[b]    \n\t"            \
2039
        "muls r6, r4, r6    \n\t"            \
2040
        "lsrs r4, r6, #16   \n\t"            \
2041
        "lsls r6, r6, #16   \n\t"            \
2042
        "adds %[l], %[l], r6    \n\t"            \
2043
        "adcs %[h], %[h], r4    \n\t"            \
2044
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2045
        : [a] "l" (va), [b] "l" (vb)                     \
2046
        : "r4", "r5", "r6", "cc"                         \
2047
    )
2048
/* Multiply va by vb and store double size result in: vo | vh | vl */
2049
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
2050
    __asm__ __volatile__ (                               \
2051
        /* al * bl */                                    \
2052
        "uxth r6, %[a]    \n\t"            \
2053
        "uxth %[l], %[b]    \n\t"            \
2054
        "muls %[l], r6, %[l]    \n\t"            \
2055
        /* al * bh */                                    \
2056
        "lsrs r5, %[b], #16   \n\t"            \
2057
        "muls r6, r5, r6    \n\t"            \
2058
        "lsrs %[h], r6, #16   \n\t"            \
2059
        "lsls r6, r6, #16   \n\t"            \
2060
        "adds %[l], %[l], r6    \n\t"            \
2061
        "movs %[o], #0    \n\t"            \
2062
        "adcs %[h], %[h], %[o]  \n\t"            \
2063
        /* ah * bh */                                    \
2064
        "lsrs r6, %[a], #16   \n\t"            \
2065
        "muls r5, r6, r5    \n\t"            \
2066
        "adds %[h], %[h], r5    \n\t"            \
2067
        /* ah * bl */                                    \
2068
        "uxth r5, %[b]    \n\t"            \
2069
        "muls r6, r5, r6    \n\t"            \
2070
        "lsrs r5, r6, #16   \n\t"            \
2071
        "lsls r6, r6, #16   \n\t"            \
2072
        "adds %[l], %[l], r6    \n\t"            \
2073
        "adcs %[h], %[h], r5    \n\t"            \
2074
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2075
        : [a] "l" (va), [b] "l" (vb)                     \
2076
        : "r5", "r6", "cc"                               \
2077
    )
2078
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2079
/* Multiply va by vb and add double size result into: vo | vh | vl */
2080
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2081
    __asm__ __volatile__ (                               \
2082
        /* al * bl */                                    \
2083
        "uxth r6, %[a]    \n\t"            \
2084
        "uxth r7, %[b]    \n\t"            \
2085
        "muls r7, r6, r7    \n\t"            \
2086
        "adds %[l], %[l], r7    \n\t"            \
2087
        "movs r5, #0      \n\t"            \
2088
        "adcs %[h], %[h], r5    \n\t"            \
2089
        "adcs %[o], %[o], r5    \n\t"            \
2090
        /* al * bh */                                    \
2091
        "lsrs r7, %[b], #16   \n\t"            \
2092
        "muls r6, r7, r6    \n\t"            \
2093
        "lsrs r7, r6, #16   \n\t"            \
2094
        "lsls r6, r6, #16   \n\t"            \
2095
        "adds %[l], %[l], r6    \n\t"            \
2096
        "adcs %[h], %[h], r7    \n\t"            \
2097
        "adcs %[o], %[o], r5    \n\t"            \
2098
        /* ah * bh */                                    \
2099
        "lsrs r6, %[a], #16   \n\t"            \
2100
        "lsrs r7, %[b], #16   \n\t"            \
2101
        "muls r7, r6, r7    \n\t"            \
2102
        "adds %[h], %[h], r7    \n\t"            \
2103
        "adcs %[o], %[o], r5    \n\t"            \
2104
        /* ah * bl */                                    \
2105
        "uxth r7, %[b]    \n\t"            \
2106
        "muls r6, r7, r6    \n\t"            \
2107
        "lsrs r7, r6, #16   \n\t"            \
2108
        "lsls r6, r6, #16   \n\t"            \
2109
        "adds %[l], %[l], r6    \n\t"            \
2110
        "adcs %[h], %[h], r7    \n\t"            \
2111
        "adcs %[o], %[o], r5    \n\t"            \
2112
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2113
        : [a] "l" (va), [b] "l" (vb)                     \
2114
        : "r5", "r6", "r7", "cc"                         \
2115
    )
2116
#else
2117
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2118
    __asm__ __volatile__ (                               \
2119
        /* al * bl */                                    \
2120
        "uxth   r6, %[a]                \n\t"            \
2121
        "uxth   r5, %[b]                \n\t"            \
2122
        "muls   r5, r6, r5              \n\t"            \
2123
        "adds   %[l], %[l], r5          \n\t"            \
2124
        "movs   r5, #0                  \n\t"            \
2125
        "adcs   %[h], %[h], r5          \n\t"            \
2126
        "adcs   %[o], %[o], r5          \n\t"            \
2127
        /* al * bh */                                    \
2128
        "lsrs   r5, %[b], #16           \n\t"            \
2129
        "muls   r6, r5, r6              \n\t"            \
2130
        "lsrs   r5, r6, #16             \n\t"            \
2131
        "lsls   r6, r6, #16             \n\t"            \
2132
        "adds   %[l], %[l], r6          \n\t"            \
2133
        "adcs   %[h], %[h], r5          \n\t"            \
2134
        "movs   r5, #0                  \n\t"            \
2135
        "adcs   %[o], %[o], r5          \n\t"            \
2136
        /* ah * bh */                                    \
2137
        "lsrs   r6, %[a], #16           \n\t"            \
2138
        "lsrs   r5, %[b], #16           \n\t"            \
2139
        "muls   r5, r6, r5              \n\t"            \
2140
        "adds   %[h], %[h], r5          \n\t"            \
2141
        "movs   r5, #0                  \n\t"            \
2142
        "adcs   %[o], %[o], r5          \n\t"            \
2143
        /* ah * bl */                                    \
2144
        "uxth   r5, %[b]                \n\t"            \
2145
        "muls   r6, r5, r6              \n\t"            \
2146
        "lsrs   r5, r6, #16             \n\t"            \
2147
        "lsls   r6, r6, #16             \n\t"            \
2148
        "adds   %[l], %[l], r6          \n\t"            \
2149
        "adcs   %[h], %[h], r5          \n\t"            \
2150
        "movs   r5, #0                  \n\t"            \
2151
        "adcs   %[o], %[o], r5          \n\t"            \
2152
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2153
        : [a] "l" (va), [b] "l" (vb)                     \
2154
        : "r5", "r6", "cc"                               \
2155
    )
2156
#endif
2157
/* Multiply va by vb and add double size result into: vh | vl */
2158
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
2159
    __asm__ __volatile__ (                               \
2160
        /* al * bl */                                    \
2161
        "uxth r6, %[a]    \n\t"            \
2162
        "uxth r4, %[b]    \n\t"            \
2163
        "muls r4, r6, r4    \n\t"            \
2164
        "adds %[l], %[l], r4    \n\t"            \
2165
        "movs r5, #0      \n\t"            \
2166
        "adcs %[h], %[h], r5    \n\t"            \
2167
        /* al * bh */                                    \
2168
        "lsrs r4, %[b], #16   \n\t"            \
2169
        "muls r6, r4, r6    \n\t"            \
2170
        "lsrs r4, r6, #16   \n\t"            \
2171
        "lsls r6, r6, #16   \n\t"            \
2172
        "adds %[l], %[l], r6    \n\t"            \
2173
        "adcs %[h], %[h], r4    \n\t"            \
2174
        /* ah * bh */                                    \
2175
        "lsrs r6, %[a], #16   \n\t"            \
2176
        "lsrs r4, %[b], #16   \n\t"            \
2177
        "muls r4, r6, r4    \n\t"            \
2178
        "adds %[h], %[h], r4    \n\t"            \
2179
        /* ah * bl */                                    \
2180
        "uxth r4, %[b]    \n\t"            \
2181
        "muls r6, r4, r6    \n\t"            \
2182
        "lsrs r4, r6, #16   \n\t"            \
2183
        "lsls r6, r6, #16   \n\t"            \
2184
        "adds %[l], %[l], r6    \n\t"            \
2185
        "adcs %[h], %[h], r4    \n\t"            \
2186
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2187
        : [a] "l" (va), [b] "l" (vb)                     \
2188
        : "r4", "r5", "r6", "cc"                         \
2189
    )
2190
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2191
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2192
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2193
    __asm__ __volatile__ (                               \
2194
        /* al * bl */                                    \
2195
        "uxth r6, %[a]    \n\t"            \
2196
        "uxth r7, %[b]    \n\t"            \
2197
        "muls r7, r6, r7    \n\t"            \
2198
        "adds %[l], %[l], r7    \n\t"            \
2199
        "movs r5, #0      \n\t"            \
2200
        "adcs %[h], %[h], r5    \n\t"            \
2201
        "adcs %[o], %[o], r5    \n\t"            \
2202
        "adds %[l], %[l], r7    \n\t"            \
2203
        "adcs %[h], %[h], r5    \n\t"            \
2204
        "adcs %[o], %[o], r5    \n\t"            \
2205
        /* al * bh */                                    \
2206
        "lsrs r7, %[b], #16   \n\t"            \
2207
        "muls r6, r7, r6    \n\t"            \
2208
        "lsrs r7, r6, #16   \n\t"            \
2209
        "lsls r6, r6, #16   \n\t"            \
2210
        "adds %[l], %[l], r6    \n\t"            \
2211
        "adcs %[h], %[h], r7    \n\t"            \
2212
        "adcs %[o], %[o], r5    \n\t"            \
2213
        "adds %[l], %[l], r6    \n\t"            \
2214
        "adcs %[h], %[h], r7    \n\t"            \
2215
        "adcs %[o], %[o], r5    \n\t"            \
2216
        /* ah * bh */                                    \
2217
        "lsrs r6, %[a], #16   \n\t"            \
2218
        "lsrs r7, %[b], #16   \n\t"            \
2219
        "muls r7, r6, r7    \n\t"            \
2220
        "adds %[h], %[h], r7    \n\t"            \
2221
        "adcs %[o], %[o], r5    \n\t"            \
2222
        "adds %[h], %[h], r7    \n\t"            \
2223
        "adcs %[o], %[o], r5    \n\t"            \
2224
        /* ah * bl */                                    \
2225
        "uxth r7, %[b]    \n\t"            \
2226
        "muls r6, r7, r6    \n\t"            \
2227
        "lsrs r7, r6, #16   \n\t"            \
2228
        "lsls r6, r6, #16   \n\t"            \
2229
        "adds %[l], %[l], r6    \n\t"            \
2230
        "adcs %[h], %[h], r7    \n\t"            \
2231
        "adcs %[o], %[o], r5    \n\t"            \
2232
        "adds %[l], %[l], r6    \n\t"            \
2233
        "adcs %[h], %[h], r7    \n\t"            \
2234
        "adcs %[o], %[o], r5    \n\t"            \
2235
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2236
        : [a] "l" (va), [b] "l" (vb)                     \
2237
        : "r5", "r6", "r7", "cc"                         \
2238
    )
2239
#else
2240
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2241
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2242
    __asm__ __volatile__ (                               \
2243
        "movs r8, %[a]    \n\t"            \
2244
        /* al * bl */                                    \
2245
        "uxth r6, %[a]    \n\t"            \
2246
        "uxth r5, %[b]    \n\t"            \
2247
        "muls r5, r6, r5    \n\t"            \
2248
        "adds %[l], %[l], r5    \n\t"            \
2249
        "movs %[a], #0    \n\t"            \
2250
        "adcs %[h], %[h], %[a]  \n\t"            \
2251
        "adcs %[o], %[o], %[a]  \n\t"            \
2252
        "adds %[l], %[l], r5    \n\t"            \
2253
        "adcs %[h], %[h], %[a]  \n\t"            \
2254
        "adcs %[o], %[o], %[a]  \n\t"            \
2255
        /* al * bh */                                    \
2256
        "lsrs r5, %[b], #16   \n\t"            \
2257
        "muls r6, r5, r6    \n\t"            \
2258
        "lsrs r5, r6, #16   \n\t"            \
2259
        "lsls r6, r6, #16   \n\t"            \
2260
        "adds %[l], %[l], r6    \n\t"            \
2261
        "adcs %[h], %[h], r5    \n\t"            \
2262
        "adcs %[o], %[o], %[a]  \n\t"            \
2263
        "adds %[l], %[l], r6    \n\t"            \
2264
        "adcs %[h], %[h], r5    \n\t"            \
2265
        "adcs %[o], %[o], %[a]  \n\t"            \
2266
        /* ah * bh */                                    \
2267
        "movs %[a], r8    \n\t"            \
2268
        "lsrs r6, %[a], #16   \n\t"            \
2269
        "lsrs r5, %[b], #16   \n\t"            \
2270
        "muls r5, r6, r5    \n\t"            \
2271
        "adds %[h], %[h], r5    \n\t"            \
2272
        "movs %[a], #0    \n\t"            \
2273
        "adcs %[o], %[o], %[a]  \n\t"            \
2274
        "adds %[h], %[h], r5    \n\t"            \
2275
        "adcs %[o], %[o], %[a]  \n\t"            \
2276
        /* ah * bl */                                    \
2277
        "uxth r5, %[b]    \n\t"            \
2278
        "muls r6, r5, r6    \n\t"            \
2279
        "lsrs r5, r6, #16   \n\t"            \
2280
        "lsls r6, r6, #16   \n\t"            \
2281
        "adds %[l], %[l], r6    \n\t"            \
2282
        "adcs %[h], %[h], r5    \n\t"            \
2283
        "adcs %[o], %[o], %[a]  \n\t"            \
2284
        "adds %[l], %[l], r6    \n\t"            \
2285
        "adcs %[h], %[h], r5    \n\t"            \
2286
        "adcs %[o], %[o], %[a]  \n\t"            \
2287
        "movs %[a], r8    \n\t"            \
2288
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2289
        : [a] "l" (va), [b] "l" (vb)                     \
2290
        : "r5", "r6", "r8", "cc"                         \
2291
    )
2292
#endif
2293
#ifndef DEBUG
2294
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2295
 * Assumes first add will not overflow vh | vl
2296
 */
2297
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2298
    __asm__ __volatile__ (                               \
2299
        /* al * bl */                                    \
2300
        "uxth r6, %[a]    \n\t"            \
2301
        "uxth r7, %[b]    \n\t"            \
2302
        "muls r7, r6, r7    \n\t"            \
2303
        "adds %[l], %[l], r7    \n\t"            \
2304
        "movs r5, #0      \n\t"            \
2305
        "adcs %[h], %[h], r5    \n\t"            \
2306
        "adds %[l], %[l], r7    \n\t"            \
2307
        "adcs %[h], %[h], r5    \n\t"            \
2308
        /* al * bh */                                    \
2309
        "lsrs r7, %[b], #16   \n\t"            \
2310
        "muls r6, r7, r6    \n\t"            \
2311
        "lsrs r7, r6, #16   \n\t"            \
2312
        "lsls r6, r6, #16   \n\t"            \
2313
        "adds %[l], %[l], r6    \n\t"            \
2314
        "adcs %[h], %[h], r7    \n\t"            \
2315
        "adds %[l], %[l], r6    \n\t"            \
2316
        "adcs %[h], %[h], r7    \n\t"            \
2317
        "adcs %[o], %[o], r5    \n\t"            \
2318
        /* ah * bh */                                    \
2319
        "lsrs r6, %[a], #16   \n\t"            \
2320
        "lsrs r7, %[b], #16   \n\t"            \
2321
        "muls r7, r6, r7    \n\t"            \
2322
        "adds %[h], %[h], r7    \n\t"            \
2323
        "adcs %[o], %[o], r5    \n\t"            \
2324
        "adds %[h], %[h], r7    \n\t"            \
2325
        "adcs %[o], %[o], r5    \n\t"            \
2326
        /* ah * bl */                                    \
2327
        "uxth r7, %[b]    \n\t"            \
2328
        "muls r6, r7, r6    \n\t"            \
2329
        "lsrs r7, r6, #16   \n\t"            \
2330
        "lsls r6, r6, #16   \n\t"            \
2331
        "adds %[l], %[l], r6    \n\t"            \
2332
        "adcs %[h], %[h], r7    \n\t"            \
2333
        "adcs %[o], %[o], r5    \n\t"            \
2334
        "adds %[l], %[l], r6    \n\t"            \
2335
        "adcs %[h], %[h], r7    \n\t"            \
2336
        "adcs %[o], %[o], r5    \n\t"            \
2337
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2338
        : [a] "l" (va), [b] "l" (vb)                     \
2339
        : "r5", "r6", "r7", "cc"                         \
2340
    )
2341
#else
2342
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2343
 * Assumes first add will not overflow vh | vl
2344
 */
2345
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2346
    __asm__ __volatile__ (                               \
2347
        "movs r8, %[a]    \n\t"            \
2348
        /* al * bl */                                    \
2349
        "uxth r5, %[a]    \n\t"            \
2350
        "uxth r6, %[b]    \n\t"            \
2351
        "muls r6, r5, r6    \n\t"            \
2352
        "adds %[l], %[l], r6    \n\t"            \
2353
        "movs %[a], #0    \n\t"            \
2354
        "adcs %[h], %[h], %[a]  \n\t"            \
2355
        "adds %[l], %[l], r6    \n\t"            \
2356
        "adcs %[h], %[h], %[a]  \n\t"            \
2357
        /* al * bh */                                    \
2358
        "lsrs r6, %[b], #16   \n\t"            \
2359
        "muls r5, r6, r5    \n\t"            \
2360
        "lsrs r6, r5, #16   \n\t"            \
2361
        "lsls r5, r5, #16   \n\t"            \
2362
        "adds %[l], %[l], r5    \n\t"            \
2363
        "adcs %[h], %[h], r6    \n\t"            \
2364
        "adds %[l], %[l], r5    \n\t"            \
2365
        "adcs %[h], %[h], r6    \n\t"            \
2366
        "adcs %[o], %[o], %[a]  \n\t"            \
2367
        /* ah * bh */                                    \
2368
        "movs %[a], r8    \n\t"            \
2369
        "lsrs r5, %[a], #16   \n\t"            \
2370
        "lsrs r6, %[b], #16   \n\t"            \
2371
        "muls r6, r5, r6    \n\t"            \
2372
        "movs %[a], #0    \n\t"            \
2373
        "adds %[h], %[h], r6    \n\t"            \
2374
        "adcs %[o], %[o], %[a]  \n\t"            \
2375
        "adds %[h], %[h], r6    \n\t"            \
2376
        "adcs %[o], %[o], %[a]  \n\t"            \
2377
        /* ah * bl */                                    \
2378
        "uxth r6, %[b]    \n\t"            \
2379
        "muls r5, r6, r5    \n\t"            \
2380
        "lsrs r6, r5, #16   \n\t"            \
2381
        "lsls r5, r5, #16   \n\t"            \
2382
        "adds %[l], %[l], r5    \n\t"            \
2383
        "adcs %[h], %[h], r6    \n\t"            \
2384
        "adcs %[o], %[o], %[a]  \n\t"            \
2385
        "adds %[l], %[l], r5    \n\t"            \
2386
        "adcs %[h], %[h], r6    \n\t"            \
2387
        "adcs %[o], %[o], %[a]  \n\t"            \
2388
        "movs %[a], r8    \n\t"            \
2389
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2390
        : [a] "l" (va), [b] "l" (vb)                     \
2391
        : "r5", "r6", "r8", "cc"                         \
2392
    )
2393
#endif
2394
/* Square va and store double size result in: vh | vl */
2395
#define SP_ASM_SQR(vl, vh, va)                           \
2396
    __asm__ __volatile__ (                               \
2397
        "lsrs r5, %[a], #16   \n\t"            \
2398
        "uxth r6, %[a]    \n\t"            \
2399
        "mov  %[l], r6    \n\t"            \
2400
        "mov  %[h], r5    \n\t"            \
2401
        /* al * al */                                    \
2402
        "muls %[l], %[l], %[l]  \n\t"            \
2403
        /* ah * ah */                                    \
2404
        "muls %[h], %[h], %[h]  \n\t"            \
2405
        /* 2 * al * ah */                                \
2406
        "muls r6, r5, r6    \n\t"            \
2407
        "lsrs r5, r6, #15   \n\t"            \
2408
        "lsls r6, r6, #17   \n\t"            \
2409
        "adds %[l], %[l], r6    \n\t"            \
2410
        "adcs %[h], %[h], r5    \n\t"            \
2411
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2412
        : [a] "l" (va)                                   \
2413
        : "r5", "r6", "cc"                               \
2414
    )
2415
/* Square va and add double size result into: vo | vh | vl */
2416
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
2417
    __asm__ __volatile__ (                               \
2418
        "lsrs r4, %[a], #16   \n\t"            \
2419
        "uxth r6, %[a]    \n\t"            \
2420
        /* al * al */                                    \
2421
        "muls r6, r6, r6    \n\t"            \
2422
        /* ah * ah */                                    \
2423
        "muls r4, r4, r4    \n\t"            \
2424
        "adds %[l], %[l], r6    \n\t"            \
2425
        "adcs %[h], %[h], r4    \n\t"            \
2426
        "movs r5, #0      \n\t"            \
2427
        "adcs %[o], %[o], r5    \n\t"            \
2428
        "lsrs r4, %[a], #16   \n\t"            \
2429
        "uxth r6, %[a]    \n\t"            \
2430
        /* 2 * al * ah */                                \
2431
        "muls r6, r4, r6    \n\t"            \
2432
        "lsrs r4, r6, #15   \n\t"            \
2433
        "lsls r6, r6, #17   \n\t"            \
2434
        "adds %[l], %[l], r6    \n\t"            \
2435
        "adcs %[h], %[h], r4    \n\t"            \
2436
        "adcs %[o], %[o], r5    \n\t"            \
2437
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2438
        : [a] "l" (va)                                   \
2439
        : "r4", "r5", "r6", "cc"                         \
2440
    )
2441
/* Square va and add double size result into: vh | vl */
2442
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2443
    __asm__ __volatile__ (                               \
2444
        "lsrs r5, %[a], #16   \n\t"            \
2445
        "uxth r6, %[a]    \n\t"            \
2446
        /* al * al */                                    \
2447
        "muls r6, r6, r6    \n\t"            \
2448
        /* ah * ah */                                    \
2449
        "muls r5, r5, r5    \n\t"            \
2450
        "adds %[l], %[l], r6    \n\t"            \
2451
        "adcs %[h], %[h], r5    \n\t"            \
2452
        "lsrs r5, %[a], #16   \n\t"            \
2453
        "uxth r6, %[a]    \n\t"            \
2454
        /* 2 * al * ah */                                \
2455
        "muls r6, r5, r6    \n\t"            \
2456
        "lsrs r5, r6, #15   \n\t"            \
2457
        "lsls r6, r6, #17   \n\t"            \
2458
        "adds %[l], %[l], r6    \n\t"            \
2459
        "adcs %[h], %[h], r5    \n\t"            \
2460
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2461
        : [a] "l" (va)                                   \
2462
        : "r5", "r6", "cc"                               \
2463
    )
2464
/* Add va into: vh | vl */
2465
#define SP_ASM_ADDC(vl, vh, va)                          \
2466
    __asm__ __volatile__ (                               \
2467
        "adds %[l], %[l], %[a]  \n\t"            \
2468
        "movs r5, #0      \n\t"            \
2469
        "adcs %[h], %[h], r5    \n\t"            \
2470
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2471
        : [a] "l" (va)                                   \
2472
        : "r5", "cc"                                     \
2473
    )
2474
/* Sub va from: vh | vl */
2475
#define SP_ASM_SUBB(vl, vh, va)                          \
2476
    __asm__ __volatile__ (                               \
2477
        "subs %[l], %[l], %[a]  \n\t"            \
2478
        "movs r5, #0      \n\t"            \
2479
        "sbcs %[h], %[h], r5    \n\t"            \
2480
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2481
        : [a] "l" (va)                                   \
2482
        : "r5", "cc"                                     \
2483
    )
2484
/* Add two times vc | vb | va into vo | vh | vl */
2485
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2486
    __asm__ __volatile__ (                               \
2487
        "adds %[l], %[l], %[a]  \n\t"            \
2488
        "adcs %[h], %[h], %[b]  \n\t"            \
2489
        "adcs %[o], %[o], %[c]  \n\t"            \
2490
        "adds %[l], %[l], %[a]  \n\t"            \
2491
        "adcs %[h], %[h], %[b]  \n\t"            \
2492
        "adcs %[o], %[o], %[c]  \n\t"            \
2493
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2494
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2495
        : "cc"                                           \
2496
    )
2497
2498
#elif defined(__GNUC__)
2499
2500
/* Multiply va by vb and store double size result in: vh | vl */
2501
#define SP_ASM_MUL(vl, vh, va, vb)                       \
2502
    __asm__ __volatile__ (                               \
2503
        /* al * bl */                                    \
2504
        "uxth r6, %[a]    \n\t"            \
2505
        "uxth %[l], %[b]    \n\t"            \
2506
        "mul  %[l], r6    \n\t"            \
2507
        /* al * bh */                                    \
2508
        "lsr  r4, %[b], #16   \n\t"            \
2509
        "mul  r6, r4      \n\t"            \
2510
        "lsr  %[h], r6, #16   \n\t"            \
2511
        "lsl  r6, r6, #16   \n\t"            \
2512
        "add  %[l], %[l], r6    \n\t"            \
2513
        "mov  r5, #0      \n\t"            \
2514
        "adc  %[h], r5    \n\t"            \
2515
        /* ah * bh */                                    \
2516
        "lsr  r6, %[a], #16   \n\t"            \
2517
        "mul  r4, r6      \n\t"            \
2518
        "add  %[h], %[h], r4    \n\t"            \
2519
        /* ah * bl */                                    \
2520
        "uxth r4, %[b]    \n\t"            \
2521
        "mul  r6, r4      \n\t"            \
2522
        "lsr  r4, r6, #16   \n\t"            \
2523
        "lsl  r6, r6, #16   \n\t"            \
2524
        "add  %[l], %[l], r6    \n\t"            \
2525
        "adc  %[h], r4    \n\t"            \
2526
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2527
        : [a] "l" (va), [b] "l" (vb)                     \
2528
        : "r4", "r5", "r6", "cc"                         \
2529
    )
2530
/* Multiply va by vb and store double size result in: vo | vh | vl */
2531
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
2532
    __asm__ __volatile__ (                               \
2533
        /* al * bl */                                    \
2534
        "uxth r6, %[a]    \n\t"            \
2535
        "uxth %[l], %[b]    \n\t"            \
2536
        "mul  %[l], r6    \n\t"            \
2537
        /* al * bh */                                    \
2538
        "lsr  r5, %[b], #16   \n\t"            \
2539
        "mul  r6, r5      \n\t"            \
2540
        "lsr  %[h], r6, #16   \n\t"            \
2541
        "lsl  r6, r6, #16   \n\t"            \
2542
        "add  %[l], %[l], r6    \n\t"            \
2543
        "mov  %[o], #0    \n\t"            \
2544
        "adc  %[h], %[o]    \n\t"            \
2545
        /* ah * bh */                                    \
2546
        "lsr  r6, %[a], #16   \n\t"            \
2547
        "mul  r5, r6      \n\t"            \
2548
        "add  %[h], %[h], r5    \n\t"            \
2549
        /* ah * bl */                                    \
2550
        "uxth r5, %[b]    \n\t"            \
2551
        "mul  r6, r5      \n\t"            \
2552
        "lsr  r5, r6, #16   \n\t"            \
2553
        "lsl  r6, r6, #16   \n\t"            \
2554
        "add  %[l], %[l], r6    \n\t"            \
2555
        "adc  %[h], r5    \n\t"            \
2556
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2557
        : [a] "l" (va), [b] "l" (vb)                     \
2558
        : "r5", "r6", "cc"                               \
2559
    )
2560
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2561
/* Multiply va by vb and add double size result into: vo | vh | vl */
2562
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2563
    __asm__ __volatile__ (                               \
2564
        /* al * bl */                                    \
2565
        "uxth r6, %[a]    \n\t"            \
2566
        "uxth r7, %[b]    \n\t"            \
2567
        "mul  r7, r6      \n\t"            \
2568
        "add  %[l], %[l], r7    \n\t"            \
2569
        "mov  r5, #0      \n\t"            \
2570
        "adc  %[h], r5    \n\t"            \
2571
        "adc  %[o], r5    \n\t"            \
2572
        /* al * bh */                                    \
2573
        "lsr  r7, %[b], #16   \n\t"            \
2574
        "mul  r6, r7      \n\t"            \
2575
        "lsr  r7, r6, #16   \n\t"            \
2576
        "lsl  r6, r6, #16   \n\t"            \
2577
        "add  %[l], %[l], r6    \n\t"            \
2578
        "adc  %[h], r7    \n\t"            \
2579
        "adc  %[o], r5    \n\t"            \
2580
        /* ah * bh */                                    \
2581
        "lsr  r6, %[a], #16   \n\t"            \
2582
        "lsr  r7, %[b], #16   \n\t"            \
2583
        "mul  r7, r6      \n\t"            \
2584
        "add  %[h], %[h], r7    \n\t"            \
2585
        "adc  %[o], r5    \n\t"            \
2586
        /* ah * bl */                                    \
2587
        "uxth r7, %[b]    \n\t"            \
2588
        "mul  r6, r7      \n\t"            \
2589
        "lsr  r7, r6, #16   \n\t"            \
2590
        "lsl  r6, r6, #16   \n\t"            \
2591
        "add  %[l], %[l], r6    \n\t"            \
2592
        "adc  %[h], r7    \n\t"            \
2593
        "adc  %[o], r5    \n\t"            \
2594
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2595
        : [a] "l" (va), [b] "l" (vb)                     \
2596
        : "r5", "r6", "r7", "cc"                         \
2597
    )
2598
#else
2599
/* Multiply va by vb and add double size result into: vo | vh | vl */
2600
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2601
    __asm__ __volatile__ (                               \
2602
        /* al * bl */                                    \
2603
        "uxth   r6, %[a]                \n\t"            \
2604
        "uxth   r5, %[b]                \n\t"            \
2605
        "mul    r5, r6                  \n\t"            \
2606
        "add    %[l], %[l], r5          \n\t"            \
2607
        "mov    r5, #0                  \n\t"            \
2608
        "adc    %[h], r5                \n\t"            \
2609
        "adc    %[o], r5                \n\t"            \
2610
        /* al * bh */                                    \
2611
        "lsr    r5, %[b], #16           \n\t"            \
2612
        "mul    r6, r5                  \n\t"            \
2613
        "lsr    r5, r6, #16             \n\t"            \
2614
        "lsl    r6, r6, #16             \n\t"            \
2615
        "add    %[l], %[l], r6          \n\t"            \
2616
        "adc    %[h], r5                \n\t"            \
2617
        "mov    r5, #0                  \n\t"            \
2618
        "adc    %[o], r5                \n\t"            \
2619
        /* ah * bh */                                    \
2620
        "lsr    r6, %[a], #16           \n\t"            \
2621
        "lsr    r5, %[b], #16           \n\t"            \
2622
        "mul    r5, r6                  \n\t"            \
2623
        "add    %[h], %[h], r5          \n\t"            \
2624
        "mov    r5, #0                  \n\t"            \
2625
        "adc    %[o], r5                \n\t"            \
2626
        /* ah * bl */                                    \
2627
        "uxth   r5, %[b]                \n\t"            \
2628
        "mul    r6, r5                  \n\t"            \
2629
        "lsr    r5, r6, #16             \n\t"            \
2630
        "lsl    r6, r6, #16             \n\t"            \
2631
        "add    %[l], %[l], r6          \n\t"            \
2632
        "adc    %[h], r5                \n\t"            \
2633
        "mov    r5, #0                  \n\t"            \
2634
        "adc    %[o], r5                \n\t"            \
2635
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2636
        : [a] "l" (va), [b] "l" (vb)                     \
2637
        : "r5", "r6", "cc"                               \
2638
    )
2639
#endif
2640
/* Multiply va by vb and add double size result into: vh | vl */
2641
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
2642
    __asm__ __volatile__ (                               \
2643
        /* al * bl */                                    \
2644
        "uxth r6, %[a]    \n\t"            \
2645
        "uxth r4, %[b]    \n\t"            \
2646
        "mul  r4, r6      \n\t"            \
2647
        "add  %[l], %[l], r4    \n\t"            \
2648
        "mov  r5, #0      \n\t"            \
2649
        "adc  %[h], r5    \n\t"            \
2650
        /* al * bh */                                    \
2651
        "lsr  r4, %[b], #16   \n\t"            \
2652
        "mul  r6, r4      \n\t"            \
2653
        "lsr  r4, r6, #16   \n\t"            \
2654
        "lsl  r6, r6, #16   \n\t"            \
2655
        "add  %[l], %[l], r6    \n\t"            \
2656
        "adc  %[h], r4    \n\t"            \
2657
        /* ah * bh */                                    \
2658
        "lsr  r6, %[a], #16   \n\t"            \
2659
        "lsr  r4, %[b], #16   \n\t"            \
2660
        "mul  r4, r6      \n\t"            \
2661
        "add  %[h], %[h], r4    \n\t"            \
2662
        /* ah * bl */                                    \
2663
        "uxth r4, %[b]    \n\t"            \
2664
        "mul  r6, r4      \n\t"            \
2665
        "lsr  r4, r6, #16   \n\t"            \
2666
        "lsl  r6, r6, #16   \n\t"            \
2667
        "add  %[l], %[l], r6    \n\t"            \
2668
        "adc  %[h], r4    \n\t"            \
2669
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2670
        : [a] "l" (va), [b] "l" (vb)                     \
2671
        : "r4", "r5", "r6", "cc"                         \
2672
    )
2673
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2674
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2675
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2676
    __asm__ __volatile__ (                               \
2677
        /* al * bl */                                    \
2678
        "uxth r6, %[a]    \n\t"            \
2679
        "uxth r7, %[b]    \n\t"            \
2680
        "mul  r7, r6      \n\t"            \
2681
        "add  %[l], %[l], r7    \n\t"            \
2682
        "mov  r5, #0      \n\t"            \
2683
        "adc  %[h], r5    \n\t"            \
2684
        "adc  %[o], r5    \n\t"            \
2685
        "add  %[l], %[l], r7    \n\t"            \
2686
        "adc  %[h], r5    \n\t"            \
2687
        "adc  %[o], r5    \n\t"            \
2688
        /* al * bh */                                    \
2689
        "lsr  r7, %[b], #16   \n\t"            \
2690
        "mul  r6, r7      \n\t"            \
2691
        "lsr  r7, r6, #16   \n\t"            \
2692
        "lsl  r6, r6, #16   \n\t"            \
2693
        "add  %[l], %[l], r6    \n\t"            \
2694
        "adc  %[h], r7    \n\t"            \
2695
        "adc  %[o], r5    \n\t"            \
2696
        "add  %[l], %[l], r6    \n\t"            \
2697
        "adc  %[h], r7    \n\t"            \
2698
        "adc  %[o], r5    \n\t"            \
2699
        /* ah * bh */                                    \
2700
        "lsr  r6, %[a], #16   \n\t"            \
2701
        "lsr  r7, %[b], #16   \n\t"            \
2702
        "mul  r7, r6      \n\t"            \
2703
        "add  %[h], %[h], r7    \n\t"            \
2704
        "adc  %[o], r5    \n\t"            \
2705
        "add  %[h], %[h], r7    \n\t"            \
2706
        "adc  %[o], r5    \n\t"            \
2707
        /* ah * bl */                                    \
2708
        "uxth r7, %[b]    \n\t"            \
2709
        "mul  r6, r7      \n\t"            \
2710
        "lsr  r7, r6, #16   \n\t"            \
2711
        "lsl  r6, r6, #16   \n\t"            \
2712
        "add  %[l], %[l], r6    \n\t"            \
2713
        "adc  %[h], r7    \n\t"            \
2714
        "adc  %[o], r5    \n\t"            \
2715
        "add  %[l], %[l], r6    \n\t"            \
2716
        "adc  %[h], r7    \n\t"            \
2717
        "adc  %[o], r5    \n\t"            \
2718
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2719
        : [a] "l" (va), [b] "l" (vb)                     \
2720
        : "r5", "r6", "r7", "cc"                         \
2721
    )
2722
#else
2723
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2724
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2725
    __asm__ __volatile__ (                               \
2726
        "mov    r8, %[a]                \n\t"            \
2727
        /* al * bl */                                    \
2728
        "uxth   r6, %[a]                \n\t"            \
2729
        "uxth   r5, %[b]                \n\t"            \
2730
        "mul    r5, r6                  \n\t"            \
2731
        "add    %[l], %[l], r5          \n\t"            \
2732
        "mov    %[a], #0                \n\t"            \
2733
        "adc    %[h], %[a]              \n\t"            \
2734
        "adc    %[o], %[a]              \n\t"            \
2735
        "add    %[l], %[l], r5          \n\t"            \
2736
        "adc    %[h], %[a]              \n\t"            \
2737
        "adc    %[o], %[a]              \n\t"            \
2738
        /* al * bh */                                    \
2739
        "lsr    r5, %[b], #16           \n\t"            \
2740
        "mul    r6, r5                  \n\t"            \
2741
        "lsr    r5, r6, #16             \n\t"            \
2742
        "lsl    r6, r6, #16             \n\t"            \
2743
        "add    %[l], %[l], r6          \n\t"            \
2744
        "adc    %[h], r5                \n\t"            \
2745
        "adc    %[o], %[a]              \n\t"            \
2746
        "add    %[l], %[l], r6          \n\t"            \
2747
        "adc    %[h], r5                \n\t"            \
2748
        "adc    %[o], %[a]              \n\t"            \
2749
        /* ah * bh */                                    \
2750
        "mov    %[a], r8                \n\t"            \
2751
        "lsr    r6, %[a], #16           \n\t"            \
2752
        "lsr    r5, %[b], #16           \n\t"            \
2753
        "mul    r5, r6                  \n\t"            \
2754
        "add    %[h], %[h], r5          \n\t"            \
2755
        "mov    %[a], #0                \n\t"            \
2756
        "adc    %[o], %[a]              \n\t"            \
2757
        "add    %[h], %[h], r5          \n\t"            \
2758
        "adc    %[o], %[a]              \n\t"            \
2759
        /* ah * bl */                                    \
2760
        "uxth   r5, %[b]                \n\t"            \
2761
        "mul    r6, r5                  \n\t"            \
2762
        "lsr    r5, r6, #16             \n\t"            \
2763
        "lsl    r6, r6, #16             \n\t"            \
2764
        "add    %[l], %[l], r6          \n\t"            \
2765
        "adc    %[h], r5                \n\t"            \
2766
        "adc    %[o], %[a]              \n\t"            \
2767
        "add    %[l], %[l], r6          \n\t"            \
2768
        "adc    %[h], r5                \n\t"            \
2769
        "adc    %[o], %[a]              \n\t"            \
2770
        "mov    %[a], r8                \n\t"            \
2771
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2772
        : [a] "l" (va), [b] "l" (vb)                     \
2773
        : "r5", "r6", "r8", "cc"                         \
2774
    )
2775
#endif
2776
#ifndef DEBUG
2777
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2778
 * Assumes first add will not overflow vh | vl
2779
 */
2780
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2781
    __asm__ __volatile__ (                               \
2782
        /* al * bl */                                    \
2783
        "uxth r6, %[a]    \n\t"            \
2784
        "uxth r7, %[b]    \n\t"            \
2785
        "mul  r7, r6      \n\t"            \
2786
        "add  %[l], %[l], r7    \n\t"            \
2787
        "mov  r5, #0      \n\t"            \
2788
        "adc  %[h], r5    \n\t"            \
2789
        "add  %[l], %[l], r7    \n\t"            \
2790
        "adc  %[h], r5    \n\t"            \
2791
        /* al * bh */                                    \
2792
        "lsr  r7, %[b], #16   \n\t"            \
2793
        "mul  r6, r7      \n\t"            \
2794
        "lsr  r7, r6, #16   \n\t"            \
2795
        "lsl  r6, r6, #16   \n\t"            \
2796
        "add  %[l], %[l], r6    \n\t"            \
2797
        "adc  %[h], r7    \n\t"            \
2798
        "add  %[l], %[l], r6    \n\t"            \
2799
        "adc  %[h], r7    \n\t"            \
2800
        "adc  %[o], r5    \n\t"            \
2801
        /* ah * bh */                                    \
2802
        "lsr  r6, %[a], #16   \n\t"            \
2803
        "lsr  r7, %[b], #16   \n\t"            \
2804
        "mul  r7, r6      \n\t"            \
2805
        "add  %[h], %[h], r7    \n\t"            \
2806
        "adc  %[o], r5    \n\t"            \
2807
        "add  %[h], %[h], r7    \n\t"            \
2808
        "adc  %[o], r5    \n\t"            \
2809
        /* ah * bl */                                    \
2810
        "uxth r7, %[b]    \n\t"            \
2811
        "mul  r6, r7      \n\t"            \
2812
        "lsr  r7, r6, #16   \n\t"            \
2813
        "lsl  r6, r6, #16   \n\t"            \
2814
        "add  %[l], %[l], r6    \n\t"            \
2815
        "adc  %[h], r7    \n\t"            \
2816
        "adc  %[o], r5    \n\t"            \
2817
        "add  %[l], %[l], r6    \n\t"            \
2818
        "adc  %[h], r7    \n\t"            \
2819
        "adc  %[o], r5    \n\t"            \
2820
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2821
        : [a] "l" (va), [b] "l" (vb)                     \
2822
        : "r5", "r6", "r7", "cc"                         \
2823
    )
2824
#else
2825
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2826
 * Assumes first add will not overflow vh | vl
2827
 */
2828
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2829
    __asm__ __volatile__ (                               \
2830
        "mov  r8, %[a]    \n\t"            \
2831
        /* al * bl */                                    \
2832
        "uxth r5, %[a]    \n\t"            \
2833
        "uxth r6, %[b]    \n\t"            \
2834
        "mul  r6, r5      \n\t"            \
2835
        "add  %[l], %[l], r6    \n\t"            \
2836
        "mov  %[a], #0    \n\t"            \
2837
        "adc  %[h], %[a]    \n\t"            \
2838
        "add  %[l], %[l], r6    \n\t"            \
2839
        "adc  %[h], %[a]    \n\t"            \
2840
        /* al * bh */                                    \
2841
        "lsr  r6, %[b], #16   \n\t"            \
2842
        "mul  r5, r6      \n\t"            \
2843
        "lsr  r6, r5, #16   \n\t"            \
2844
        "lsl  r5, r5, #16   \n\t"            \
2845
        "add  %[l], %[l], r5    \n\t"            \
2846
        "adc  %[h], r6    \n\t"            \
2847
        "add  %[l], %[l], r5    \n\t"            \
2848
        "adc  %[h], r6    \n\t"            \
2849
        "adc  %[o], %[a]    \n\t"            \
2850
        /* ah * bh */                                    \
2851
        "mov    %[a], r8                \n\t"            \
2852
        "lsr  r5, %[a], #16   \n\t"            \
2853
        "lsr  r6, %[b], #16   \n\t"            \
2854
        "mul  r6, r5      \n\t"            \
2855
        "mov    %[a], #0                \n\t"            \
2856
        "add  %[h], %[h], r6    \n\t"            \
2857
        "adc  %[o], %[a]    \n\t"            \
2858
        "add  %[h], %[h], r6    \n\t"            \
2859
        "adc  %[o], %[a]    \n\t"            \
2860
        /* ah * bl */                                    \
2861
        "uxth r6, %[b]    \n\t"            \
2862
        "mul  r5, r6      \n\t"            \
2863
        "lsr  r6, r5, #16   \n\t"            \
2864
        "lsl  r5, r5, #16   \n\t"            \
2865
        "add  %[l], %[l], r5    \n\t"            \
2866
        "adc  %[h], r6    \n\t"            \
2867
        "adc  %[o], %[a]    \n\t"            \
2868
        "add  %[l], %[l], r5    \n\t"            \
2869
        "adc  %[h], r6    \n\t"            \
2870
        "adc  %[o], %[a]    \n\t"            \
2871
        "mov    %[a], r8                \n\t"            \
2872
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2873
        : [a] "l" (va), [b] "l" (vb)                     \
2874
        : "r5", "r6", "r8", "cc"                         \
2875
    )
2876
#endif
2877
/* Square va and store double size result in: vh | vl */
2878
#define SP_ASM_SQR(vl, vh, va)                           \
2879
    __asm__ __volatile__ (                               \
2880
        "lsr  r5, %[a], #16   \n\t"            \
2881
        "uxth r6, %[a]    \n\t"            \
2882
        "mov  %[l], r6    \n\t"            \
2883
        "mov  %[h], r5    \n\t"            \
2884
        /* al * al */                                    \
2885
        "mul  %[l], %[l]    \n\t"            \
2886
        /* ah * ah */                                    \
2887
        "mul  %[h], %[h]    \n\t"            \
2888
        /* 2 * al * ah */                                \
2889
        "mul  r6, r5      \n\t"            \
2890
        "lsr  r5, r6, #15   \n\t"            \
2891
        "lsl  r6, r6, #17   \n\t"            \
2892
        "add  %[l], %[l], r6    \n\t"            \
2893
        "adc  %[h], r5    \n\t"            \
2894
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2895
        : [a] "l" (va)                                   \
2896
        : "r5", "r6", "cc"                               \
2897
    )
2898
/* Square va and add double size result into: vo | vh | vl */
2899
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
2900
    __asm__ __volatile__ (                               \
2901
        "lsr  r4, %[a], #16   \n\t"            \
2902
        "uxth r6, %[a]    \n\t"            \
2903
        /* al * al */                                    \
2904
        "mul  r6, r6      \n\t"            \
2905
        /* ah * ah */                                    \
2906
        "mul  r4, r4      \n\t"            \
2907
        "add  %[l], %[l], r6    \n\t"            \
2908
        "adc  %[h], r4    \n\t"            \
2909
        "mov  r5, #0      \n\t"            \
2910
        "adc  %[o], r5    \n\t"            \
2911
        "lsr  r4, %[a], #16   \n\t"            \
2912
        "uxth r6, %[a]    \n\t"            \
2913
        /* 2 * al * ah */                                \
2914
        "mul  r6, r4      \n\t"            \
2915
        "lsr  r4, r6, #15   \n\t"            \
2916
        "lsl  r6, r6, #17   \n\t"            \
2917
        "add  %[l], %[l], r6    \n\t"            \
2918
        "adc  %[h], r4    \n\t"            \
2919
        "adc  %[o], r5    \n\t"            \
2920
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2921
        : [a] "l" (va)                                   \
2922
        : "r4", "r5", "r6", "cc"                         \
2923
    )
2924
/* Square va and add double size result into: vh | vl */
2925
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2926
    __asm__ __volatile__ (                               \
2927
        "lsr  r5, %[a], #16   \n\t"            \
2928
        "uxth r6, %[a]    \n\t"            \
2929
        /* al * al */                                    \
2930
        "mul  r6, r6      \n\t"            \
2931
        /* ah * ah */                                    \
2932
        "mul  r5, r5      \n\t"            \
2933
        "add  %[l], %[l], r6    \n\t"            \
2934
        "adc  %[h], r5    \n\t"            \
2935
        "lsr  r5, %[a], #16   \n\t"            \
2936
        "uxth r6, %[a]    \n\t"            \
2937
        /* 2 * al * ah */                                \
2938
        "mul  r6, r5      \n\t"            \
2939
        "lsr  r5, r6, #15   \n\t"            \
2940
        "lsl  r6, r6, #17   \n\t"            \
2941
        "add  %[l], %[l], r6    \n\t"            \
2942
        "adc  %[h], r5    \n\t"            \
2943
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2944
        : [a] "l" (va)                                   \
2945
        : "r5", "r6", "cc"                               \
2946
    )
2947
/* Add va into: vh | vl */
2948
#define SP_ASM_ADDC(vl, vh, va)                          \
2949
    __asm__ __volatile__ (                               \
2950
        "add  %[l], %[l], %[a]  \n\t"            \
2951
        "mov  r5, #0      \n\t"            \
2952
        "adc  %[h], r5    \n\t"            \
2953
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2954
        : [a] "l" (va)                                   \
2955
        : "r5", "cc"                                     \
2956
    )
2957
/* Sub va from: vh | vl */
2958
#define SP_ASM_SUBB(vl, vh, va)                          \
2959
    __asm__ __volatile__ (                               \
2960
        "sub  %[l], %[l], %[a]  \n\t"            \
2961
        "mov  r5, #0      \n\t"            \
2962
        "sbc  %[h], r5    \n\t"            \
2963
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2964
        : [a] "l" (va)                                   \
2965
        : "r5", "cc"                                     \
2966
    )
2967
/* Add two times vc | vb | va into vo | vh | vl */
2968
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2969
    __asm__ __volatile__ (                               \
2970
        "add  %[l], %[l], %[a]  \n\t"            \
2971
        "adc  %[h], %[b]    \n\t"            \
2972
        "adc  %[o], %[c]    \n\t"            \
2973
        "add  %[l], %[l], %[a]  \n\t"            \
2974
        "adc  %[h], %[b]    \n\t"            \
2975
        "adc  %[o], %[c]    \n\t"            \
2976
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2977
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2978
        : "cc"                                           \
2979
    )
2980
2981
#endif
2982
2983
#ifdef WOLFSSL_SP_DIV_WORD_HALF
2984
/* Divide a two digit number by a digit number and return. (hi | lo) / d
2985
 *
2986
 * No division instruction used - does operation bit by bit.
2987
 * Constant time.
2988
 *
2989
 * @param [in] hi  SP integer digit. High digit of the dividend.
2990
 * @param [in] lo  SP integer digit. Low digit of the dividend.
2991
 * @param [in] d   SP integer digit. Number to divide by.
2992
 * @return  The division result.
2993
 */
2994
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
2995
                                          sp_int_digit d)
2996
{
2997
    __asm__ __volatile__ (
2998
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2999
        "lsrs r3, %[d], #24\n\t"
3000
#else
3001
        "lsr  r3, %[d], #24\n\t"
3002
#endif
3003
        "beq  2%=f\n\t"
3004
  "\n1%=:\n\t"
3005
        "movs r3, #0\n\t"
3006
        "b  3%=f\n\t"
3007
  "\n2%=:\n\t"
3008
        "mov  r3, #8\n\t"
3009
  "\n3%=:\n\t"
3010
        "movs r4, #31\n\t"
3011
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3012
        "subs r4, r4, r3\n\t"
3013
#else
3014
        "sub  r4, r4, r3\n\t"
3015
#endif
3016
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3017
        "lsls %[d], %[d], r3\n\t"
3018
#else
3019
        "lsl  %[d], %[d], r3\n\t"
3020
#endif
3021
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3022
        "lsls %[hi], %[hi], r3\n\t"
3023
#else
3024
        "lsl  %[hi], %[hi], r3\n\t"
3025
#endif
3026
        "mov  r5, %[lo]\n\t"
3027
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3028
        "lsrs r5, r5, r4\n\t"
3029
#else
3030
        "lsr  r5, r5, r4\n\t"
3031
#endif
3032
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3033
        "lsls %[lo], %[lo], r3\n\t"
3034
#else
3035
        "lsl  %[lo], %[lo], r3\n\t"
3036
#endif
3037
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3038
        "lsrs r5, r5, #1\n\t"
3039
#else
3040
        "lsr  r5, r5, #1\n\t"
3041
#endif
3042
#if defined(WOLFSSL_KEIL)
3043
        "orrs %[hi], %[hi], r5\n\t"
3044
#elif defined(__clang__)
3045
        "orrs %[hi], r5\n\t"
3046
#else
3047
        "orr  %[hi], r5\n\t"
3048
#endif
3049
3050
        "movs   r3, #0\n\t"
3051
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3052
        "lsrs   r5, %[d], #1\n\t"
3053
#else
3054
        "lsr    r5, %[d], #1\n\t"
3055
#endif
3056
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3057
        "adds   r5, r5, #1\n\t"
3058
#else
3059
        "add    r5, r5, #1\n\t"
3060
#endif
3061
        "mov    r8, %[lo]\n\t"
3062
        "mov    r9, %[hi]\n\t"
3063
        /* Do top 32 */
3064
        "movs   r6, r5\n\t"
3065
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3066
        "subs   r6, r6, %[hi]\n\t"
3067
#else
3068
        "sub    r6, r6, %[hi]\n\t"
3069
#endif
3070
#ifdef WOLFSSL_KEIL
3071
        "sbcs   r6, r6, r6\n\t"
3072
#elif defined(__clang__)
3073
        "sbcs   r6, r6\n\t"
3074
#else
3075
        "sbc    r6, r6\n\t"
3076
#endif
3077
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3078
        "adds   r3, r3, r3\n\t"
3079
#else
3080
        "add    r3, r3, r3\n\t"
3081
#endif
3082
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3083
        "subs   r3, r3, r6\n\t"
3084
#else
3085
        "sub    r3, r3, r6\n\t"
3086
#endif
3087
#ifdef WOLFSSL_KEIL
3088
        "ands   r6, r6, r5\n\t"
3089
#elif defined(__clang__)
3090
        "ands   r6, r5\n\t"
3091
#else
3092
        "and    r6, r5\n\t"
3093
#endif
3094
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3095
        "subs   %[hi], %[hi], r6\n\t"
3096
#else
3097
        "sub    %[hi], %[hi], r6\n\t"
3098
#endif
3099
        "movs   r4, #29\n\t"
3100
        "\n"
3101
    "L_sp_div_word_loop%=:\n\t"
3102
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3103
        "lsls   %[lo], %[lo], #1\n\t"
3104
#else
3105
        "lsl    %[lo], %[lo], #1\n\t"
3106
#endif
3107
#ifdef WOLFSSL_KEIL
3108
        "adcs   %[hi], %[hi], %[hi]\n\t"
3109
#elif defined(__clang__)
3110
        "adcs   %[hi], %[hi]\n\t"
3111
#else
3112
        "adc    %[hi], %[hi]\n\t"
3113
#endif
3114
        "movs   r6, r5\n\t"
3115
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3116
        "subs   r6, r6, %[hi]\n\t"
3117
#else
3118
        "sub    r6, r6, %[hi]\n\t"
3119
#endif
3120
#ifdef WOLFSSL_KEIL
3121
        "sbcs   r6, r6, r6\n\t"
3122
#elif defined(__clang__)
3123
        "sbcs   r6, r6\n\t"
3124
#else
3125
        "sbc    r6, r6\n\t"
3126
#endif
3127
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3128
        "adds   r3, r3, r3\n\t"
3129
#else
3130
        "add    r3, r3, r3\n\t"
3131
#endif
3132
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3133
        "subs   r3, r3, r6\n\t"
3134
#else
3135
        "sub    r3, r3, r6\n\t"
3136
#endif
3137
#ifdef WOLFSSL_KEIL
3138
        "ands   r6, r6, r5\n\t"
3139
#elif defined(__clang__)
3140
        "ands   r6, r5\n\t"
3141
#else
3142
        "and    r6, r5\n\t"
3143
#endif
3144
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3145
        "subs   %[hi], %[hi], r6\n\t"
3146
#else
3147
        "sub    %[hi], %[hi], r6\n\t"
3148
#endif
3149
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3150
        "subs   r4, r4, #1\n\t"
3151
#else
3152
        "sub    r4, r4, #1\n\t"
3153
#endif
3154
        "bpl    L_sp_div_word_loop%=\n\t"
3155
        "movs   r7, #0\n\t"
3156
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3157
        "adds   r3, r3, r3\n\t"
3158
#else
3159
        "add    r3, r3, r3\n\t"
3160
#endif
3161
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3162
        "adds   r3, r3, #1\n\t"
3163
#else
3164
        "add    r3, r3, #1\n\t"
3165
#endif
3166
        /* r * d - Start */
3167
        "uxth   %[hi], r3\n\t"
3168
        "uxth   r4, %[d]\n\t"
3169
#ifdef WOLFSSL_KEIL
3170
        "muls   r4, %[hi], r4\n\t"
3171
#elif defined(__clang__)
3172
        "muls   r4, %[hi]\n\t"
3173
#else
3174
        "mul    r4, %[hi]\n\t"
3175
#endif
3176
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3177
        "lsrs   r6, %[d], #16\n\t"
3178
#else
3179
        "lsr    r6, %[d], #16\n\t"
3180
#endif
3181
#ifdef WOLFSSL_KEIL
3182
        "muls   %[hi], r6, %[hi]\n\t"
3183
#elif defined(__clang__)
3184
        "muls   %[hi], r6\n\t"
3185
#else
3186
        "mul    %[hi], r6\n\t"
3187
#endif
3188
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3189
        "lsrs   r5, %[hi], #16\n\t"
3190
#else
3191
        "lsr    r5, %[hi], #16\n\t"
3192
#endif
3193
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3194
        "lsls   %[hi], %[hi], #16\n\t"
3195
#else
3196
        "lsl    %[hi], %[hi], #16\n\t"
3197
#endif
3198
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3199
        "adds   r4, r4, %[hi]\n\t"
3200
#else
3201
        "add    r4, r4, %[hi]\n\t"
3202
#endif
3203
#ifdef WOLFSSL_KEIL
3204
        "adcs   r5, r5, r7\n\t"
3205
#elif defined(__clang__)
3206
        "adcs   r5, r7\n\t"
3207
#else
3208
        "adc    r5, r7\n\t"
3209
#endif
3210
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3211
        "lsrs   %[hi], r3, #16\n\t"
3212
#else
3213
        "lsr    %[hi], r3, #16\n\t"
3214
#endif
3215
#ifdef WOLFSSL_KEIL
3216
        "muls   r6, %[hi], r6\n\t"
3217
#elif defined(__clang__)
3218
        "muls   r6, %[hi]\n\t"
3219
#else
3220
        "mul    r6, %[hi]\n\t"
3221
#endif
3222
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3223
        "adds   r5, r5, r6\n\t"
3224
#else
3225
        "add    r5, r5, r6\n\t"
3226
#endif
3227
        "uxth   r6, %[d]\n\t"
3228
#ifdef WOLFSSL_KEIL
3229
        "muls   %[hi], r6, %[hi]\n\t"
3230
#elif defined(__clang__)
3231
        "muls   %[hi], r6\n\t"
3232
#else
3233
        "mul    %[hi], r6\n\t"
3234
#endif
3235
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3236
        "lsrs   r6, %[hi], #16\n\t"
3237
#else
3238
        "lsr    r6, %[hi], #16\n\t"
3239
#endif
3240
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3241
        "lsls   %[hi], %[hi], #16\n\t"
3242
#else
3243
        "lsl    %[hi], %[hi], #16\n\t"
3244
#endif
3245
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3246
        "adds   r4, r4, %[hi]\n\t"
3247
#else
3248
        "add    r4, r4, %[hi]\n\t"
3249
#endif
3250
#ifdef WOLFSSL_KEIL
3251
        "adcs   r5, r5, r6\n\t"
3252
#elif defined(__clang__)
3253
        "adcs   r5, r6\n\t"
3254
#else
3255
        "adc    r5, r6\n\t"
3256
#endif
3257
        /* r * d - Done */
3258
        "mov    %[hi], r8\n\t"
3259
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3260
        "subs   %[hi], %[hi], r4\n\t"
3261
#else
3262
        "sub    %[hi], %[hi], r4\n\t"
3263
#endif
3264
        "movs   r4, %[hi]\n\t"
3265
        "mov    %[hi], r9\n\t"
3266
#ifdef WOLFSSL_KEIL
3267
        "sbcs   %[hi], %[hi], r5\n\t"
3268
#elif defined(__clang__)
3269
        "sbcs   %[hi], r5\n\t"
3270
#else
3271
        "sbc    %[hi], r5\n\t"
3272
#endif
3273
        "movs   r5, %[hi]\n\t"
3274
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3275
        "adds   r3, r3, r5\n\t"
3276
#else
3277
        "add    r3, r3, r5\n\t"
3278
#endif
3279
        /* r * d - Start */
3280
        "uxth   %[hi], r3\n\t"
3281
        "uxth   r4, %[d]\n\t"
3282
#ifdef WOLFSSL_KEIL
3283
        "muls   r4, %[hi], r4\n\t"
3284
#elif defined(__clang__)
3285
        "muls   r4, %[hi]\n\t"
3286
#else
3287
        "mul    r4, %[hi]\n\t"
3288
#endif
3289
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3290
        "lsrs   r6, %[d], #16\n\t"
3291
#else
3292
        "lsr    r6, %[d], #16\n\t"
3293
#endif
3294
#ifdef WOLFSSL_KEIL
3295
        "muls   %[hi], r6, %[hi]\n\t"
3296
#elif defined(__clang__)
3297
        "muls   %[hi], r6\n\t"
3298
#else
3299
        "mul    %[hi], r6\n\t"
3300
#endif
3301
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3302
        "lsrs   r5, %[hi], #16\n\t"
3303
#else
3304
        "lsr    r5, %[hi], #16\n\t"
3305
#endif
3306
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3307
        "lsls   %[hi], %[hi], #16\n\t"
3308
#else
3309
        "lsl    %[hi], %[hi], #16\n\t"
3310
#endif
3311
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3312
        "adds   r4, r4, %[hi]\n\t"
3313
#else
3314
        "add    r4, r4, %[hi]\n\t"
3315
#endif
3316
#ifdef WOLFSSL_KEIL
3317
        "adcs   r5, r5, r7\n\t"
3318
#elif defined(__clang__)
3319
        "adcs   r5, r7\n\t"
3320
#else
3321
        "adc    r5, r7\n\t"
3322
#endif
3323
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3324
        "lsrs   %[hi], r3, #16\n\t"
3325
#else
3326
        "lsr    %[hi], r3, #16\n\t"
3327
#endif
3328
#ifdef WOLFSSL_KEIL
3329
        "muls   r6, %[hi], r6\n\t"
3330
#elif defined(__clang__)
3331
        "muls   r6, %[hi]\n\t"
3332
#else
3333
        "mul    r6, %[hi]\n\t"
3334
#endif
3335
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3336
        "adds   r5, r5, r6\n\t"
3337
#else
3338
        "add    r5, r5, r6\n\t"
3339
#endif
3340
        "uxth   r6, %[d]\n\t"
3341
#ifdef WOLFSSL_KEIL
3342
        "muls   %[hi], r6, %[hi]\n\t"
3343
#elif defined(__clang__)
3344
        "muls   %[hi], r6\n\t"
3345
#else
3346
        "mul    %[hi], r6\n\t"
3347
#endif
3348
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3349
        "lsrs   r6, %[hi], #16\n\t"
3350
#else
3351
        "lsr    r6, %[hi], #16\n\t"
3352
#endif
3353
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3354
        "lsls   %[hi], %[hi], #16\n\t"
3355
#else
3356
        "lsl    %[hi], %[hi], #16\n\t"
3357
#endif
3358
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3359
        "adds   r4, r4, %[hi]\n\t"
3360
#else
3361
        "add    r4, r4, %[hi]\n\t"
3362
#endif
3363
#ifdef WOLFSSL_KEIL
3364
        "adcs   r5, r5, r6\n\t"
3365
#elif defined(__clang__)
3366
        "adcs   r5, r6\n\t"
3367
#else
3368
        "adc    r5, r6\n\t"
3369
#endif
3370
        /* r * d - Done */
3371
        "mov    %[hi], r8\n\t"
3372
        "mov    r6, r9\n\t"
3373
#ifdef WOLFSSL_KEIL
3374
        "subs   r4, %[hi], r4\n\t"
3375
#else
3376
#ifdef __clang__
3377
        "subs   r4, %[hi], r4\n\t"
3378
#else
3379
        "sub    r4, %[hi], r4\n\t"
3380
#endif
3381
#endif
3382
#ifdef WOLFSSL_KEIL
3383
        "sbcs   r6, r6, r5\n\t"
3384
#elif defined(__clang__)
3385
        "sbcs   r6, r5\n\t"
3386
#else
3387
        "sbc    r6, r5\n\t"
3388
#endif
3389
        "movs   r5, r6\n\t"
3390
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3391
        "adds   r3, r3, r5\n\t"
3392
#else
3393
        "add    r3, r3, r5\n\t"
3394
#endif
3395
        /* r * d - Start */
3396
        "uxth   %[hi], r3\n\t"
3397
        "uxth   r4, %[d]\n\t"
3398
#ifdef WOLFSSL_KEIL
3399
        "muls   r4, %[hi], r4\n\t"
3400
#elif defined(__clang__)
3401
        "muls   r4, %[hi]\n\t"
3402
#else
3403
        "mul    r4, %[hi]\n\t"
3404
#endif
3405
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3406
        "lsrs   r6, %[d], #16\n\t"
3407
#else
3408
        "lsr    r6, %[d], #16\n\t"
3409
#endif
3410
#ifdef WOLFSSL_KEIL
3411
        "muls   %[hi], r6, %[hi]\n\t"
3412
#elif defined(__clang__)
3413
        "muls   %[hi], r6\n\t"
3414
#else
3415
        "mul    %[hi], r6\n\t"
3416
#endif
3417
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3418
        "lsrs   r5, %[hi], #16\n\t"
3419
#else
3420
        "lsr    r5, %[hi], #16\n\t"
3421
#endif
3422
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3423
        "lsls   %[hi], %[hi], #16\n\t"
3424
#else
3425
        "lsl    %[hi], %[hi], #16\n\t"
3426
#endif
3427
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3428
        "adds   r4, r4, %[hi]\n\t"
3429
#else
3430
        "add    r4, r4, %[hi]\n\t"
3431
#endif
3432
#ifdef WOLFSSL_KEIL
3433
        "adcs   r5, r5, r7\n\t"
3434
#elif defined(__clang__)
3435
        "adcs   r5, r7\n\t"
3436
#else
3437
        "adc    r5, r7\n\t"
3438
#endif
3439
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3440
        "lsrs   %[hi], r3, #16\n\t"
3441
#else
3442
        "lsr    %[hi], r3, #16\n\t"
3443
#endif
3444
#ifdef WOLFSSL_KEIL
3445
        "muls   r6, %[hi], r6\n\t"
3446
#elif defined(__clang__)
3447
        "muls   r6, %[hi]\n\t"
3448
#else
3449
        "mul    r6, %[hi]\n\t"
3450
#endif
3451
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3452
        "adds   r5, r5, r6\n\t"
3453
#else
3454
        "add    r5, r5, r6\n\t"
3455
#endif
3456
        "uxth   r6, %[d]\n\t"
3457
#ifdef WOLFSSL_KEIL
3458
        "muls   %[hi], r6, %[hi]\n\t"
3459
#elif defined(__clang__)
3460
        "muls   %[hi], r6\n\t"
3461
#else
3462
        "mul    %[hi], r6\n\t"
3463
#endif
3464
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3465
        "lsrs   r6, %[hi], #16\n\t"
3466
#else
3467
        "lsr    r6, %[hi], #16\n\t"
3468
#endif
3469
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3470
        "lsls   %[hi], %[hi], #16\n\t"
3471
#else
3472
        "lsl    %[hi], %[hi], #16\n\t"
3473
#endif
3474
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3475
        "adds   r4, r4, %[hi]\n\t"
3476
#else
3477
        "add    r4, r4, %[hi]\n\t"
3478
#endif
3479
#ifdef WOLFSSL_KEIL
3480
        "adcs   r5, r5, r6\n\t"
3481
#elif defined(__clang__)
3482
        "adcs   r5, r6\n\t"
3483
#else
3484
        "adc    r5, r6\n\t"
3485
#endif
3486
        /* r * d - Done */
3487
        "mov    %[hi], r8\n\t"
3488
        "mov    r6, r9\n\t"
3489
#ifdef WOLFSSL_KEIL
3490
        "subs   r4, %[hi], r4\n\t"
3491
#else
3492
#ifdef __clang__
3493
        "subs   r4, %[hi], r4\n\t"
3494
#else
3495
        "sub    r4, %[hi], r4\n\t"
3496
#endif
3497
#endif
3498
#ifdef WOLFSSL_KEIL
3499
        "sbcs   r6, r6, r5\n\t"
3500
#elif defined(__clang__)
3501
        "sbcs   r6, r5\n\t"
3502
#else
3503
        "sbc    r6, r5\n\t"
3504
#endif
3505
        "movs   r5, r6\n\t"
3506
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3507
        "adds   r3, r3, r5\n\t"
3508
#else
3509
        "add    r3, r3, r5\n\t"
3510
#endif
3511
        "movs   r6, %[d]\n\t"
3512
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3513
        "subs   r6, r6, r4\n\t"
3514
#else
3515
        "sub    r6, r6, r4\n\t"
3516
#endif
3517
#ifdef WOLFSSL_KEIL
3518
        "sbcs   r6, r6, r6\n\t"
3519
#elif defined(__clang__)
3520
        "sbcs   r6, r6\n\t"
3521
#else
3522
        "sbc    r6, r6\n\t"
3523
#endif
3524
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3525
        "subs   r3, r3, r6\n\t"
3526
#else
3527
        "sub    r3, r3, r6\n\t"
3528
#endif
3529
        "movs   %[hi], r3\n\t"
3530
        : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
3531
        :
3532
        : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
3533
    );
3534
    return (sp_uint32)(size_t)hi;
3535
}
3536
3537
#define SP_ASM_DIV_WORD
3538
#endif /* !WOLFSSL_SP_DIV_WORD_HALF */
3539
3540
#define SP_INT_ASM_AVAILABLE
3541
3542
    #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
3543
3544
    #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
3545
/*
3546
 * CPU: PPC64
3547
 */
3548
3549
    #ifdef __APPLE__
3550
3551
/* Multiply va by vb and store double size result in: vh | vl */
3552
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3553
    __asm__ __volatile__ (                               \
3554
        "mulld  %[l], %[a], %[b]  \n\t"            \
3555
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3556
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3557
        : [a] "r" (va), [b] "r" (vb)                     \
3558
        :                                                \
3559
    )
3560
/* Multiply va by vb and store double size result in: vo | vh | vl */
3561
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3562
    __asm__ __volatile__ (                               \
3563
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3564
        "mulld  %[l], %[a], %[b]  \n\t"            \
3565
        "li %[o], 0     \n\t"            \
3566
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3567
        : [a] "r" (va), [b] "r" (vb)                     \
3568
        :                                                \
3569
    )
3570
/* Multiply va by vb and add double size result into: vo | vh | vl */
3571
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3572
    __asm__ __volatile__ (                               \
3573
        "mulld  r16, %[a], %[b]   \n\t"            \
3574
        "mulhdu r17, %[a], %[b]   \n\t"            \
3575
        "addc %[l], %[l], r16   \n\t"            \
3576
        "adde %[h], %[h], r17   \n\t"            \
3577
        "addze  %[o], %[o]    \n\t"            \
3578
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3579
        : [a] "r" (va), [b] "r" (vb)                     \
3580
        : "r16", "r17", "cc"                             \
3581
    )
3582
/* Multiply va by vb and add double size result into: vh | vl */
3583
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3584
    __asm__ __volatile__ (                               \
3585
        "mulld  r16, %[a], %[b]   \n\t"            \
3586
        "mulhdu r17, %[a], %[b]   \n\t"            \
3587
        "addc %[l], %[l], r16   \n\t"            \
3588
        "adde %[h], %[h], r17   \n\t"            \
3589
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3590
        : [a] "r" (va), [b] "r" (vb)                     \
3591
        : "r16", "r17", "cc"                             \
3592
    )
3593
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3594
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3595
    __asm__ __volatile__ (                               \
3596
        "mulld  r16, %[a], %[b]   \n\t"            \
3597
        "mulhdu r17, %[a], %[b]   \n\t"            \
3598
        "addc %[l], %[l], r16   \n\t"            \
3599
        "adde %[h], %[h], r17   \n\t"            \
3600
        "addze  %[o], %[o]    \n\t"            \
3601
        "addc %[l], %[l], r16   \n\t"            \
3602
        "adde %[h], %[h], r17   \n\t"            \
3603
        "addze  %[o], %[o]    \n\t"            \
3604
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3605
        : [a] "r" (va), [b] "r" (vb)                     \
3606
        : "r16", "r17", "cc"                             \
3607
    )
3608
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3609
 * Assumes first add will not overflow vh | vl
3610
 */
3611
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3612
    __asm__ __volatile__ (                               \
3613
        "mulld  r16, %[a], %[b]   \n\t"            \
3614
        "mulhdu r17, %[a], %[b]   \n\t"            \
3615
        "addc %[l], %[l], r16   \n\t"            \
3616
        "adde %[h], %[h], r17   \n\t"            \
3617
        "addc %[l], %[l], r16   \n\t"            \
3618
        "adde %[h], %[h], r17   \n\t"            \
3619
        "addze  %[o], %[o]    \n\t"            \
3620
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3621
        : [a] "r" (va), [b] "r" (vb)                     \
3622
        : "r16", "r17", "cc"                             \
3623
    )
3624
/* Square va and store double size result in: vh | vl */
3625
#define SP_ASM_SQR(vl, vh, va)                           \
3626
    __asm__ __volatile__ (                               \
3627
        "mulld  %[l], %[a], %[a]  \n\t"            \
3628
        "mulhdu %[h], %[a], %[a]  \n\t"            \
3629
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3630
        : [a] "r" (va)                                   \
3631
        :                                                \
3632
    )
3633
/* Square va and add double size result into: vo | vh | vl */
3634
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3635
    __asm__ __volatile__ (                               \
3636
        "mulld  r16, %[a], %[a]   \n\t"            \
3637
        "mulhdu r17, %[a], %[a]   \n\t"            \
3638
        "addc %[l], %[l], r16   \n\t"            \
3639
        "adde %[h], %[h], r17   \n\t"            \
3640
        "addze  %[o], %[o]    \n\t"            \
3641
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3642
        : [a] "r" (va)                                   \
3643
        : "r16", "r17", "cc"                             \
3644
    )
3645
/* Square va and add double size result into: vh | vl */
3646
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3647
    __asm__ __volatile__ (                               \
3648
        "mulld  r16, %[a], %[a]   \n\t"            \
3649
        "mulhdu r17, %[a], %[a]   \n\t"            \
3650
        "addc %[l], %[l], r16   \n\t"            \
3651
        "adde %[h], %[h], r17   \n\t"            \
3652
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3653
        : [a] "r" (va)                                   \
3654
        : "r16", "r17", "cc"                             \
3655
    )
3656
/* Add va into: vh | vl */
3657
#define SP_ASM_ADDC(vl, vh, va)                          \
3658
    __asm__ __volatile__ (                               \
3659
        "addc %[l], %[l], %[a]  \n\t"            \
3660
        "addze  %[h], %[h]    \n\t"            \
3661
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3662
        : [a] "r" (va)                                   \
3663
        : "cc"                                           \
3664
    )
3665
/* Sub va from: vh | vl */
3666
#define SP_ASM_SUBB(vl, vh, va)                          \
3667
    __asm__ __volatile__ (                               \
3668
        "subfc  %[l], %[a], %[l]  \n\t"            \
3669
        "li    r16, 0     \n\t"            \
3670
        "subfe %[h], r16, %[h]    \n\t"            \
3671
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3672
        : [a] "r" (va)                                   \
3673
        : "r16", "cc"                                    \
3674
    )
3675
/* Add two times vc | vb | va into vo | vh | vl */
3676
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3677
    __asm__ __volatile__ (                               \
3678
        "addc %[l], %[l], %[a]  \n\t"            \
3679
        "adde %[h], %[h], %[b]  \n\t"            \
3680
        "adde %[o], %[o], %[c]  \n\t"            \
3681
        "addc %[l], %[l], %[a]  \n\t"            \
3682
        "adde %[h], %[h], %[b]  \n\t"            \
3683
        "adde %[o], %[o], %[c]  \n\t"            \
3684
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3685
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3686
        : "cc"                                           \
3687
    )
3688
/* Count leading zeros. */
3689
#define SP_ASM_LZCNT(va, vn)                             \
3690
    __asm__ __volatile__ (                               \
3691
        "cntlzd %[n], %[a]  \n\t"                    \
3692
        : [n] "=r" (vn)                                  \
3693
        : [a] "r" (va)                                   \
3694
        :                                                \
3695
    )
3696
3697
    #else  /* !defined(__APPLE__) */
3698
3699
/* Multiply va by vb and store double size result in: vh | vl */
3700
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3701
    __asm__ __volatile__ (                               \
3702
        "mulld  %[l], %[a], %[b]  \n\t"            \
3703
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3704
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3705
        : [a] "r" (va), [b] "r" (vb)                     \
3706
        :                                                \
3707
    )
3708
/* Multiply va by vb and store double size result in: vo | vh | vl */
3709
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3710
    __asm__ __volatile__ (                               \
3711
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3712
        "mulld  %[l], %[a], %[b]  \n\t"            \
3713
        "li %[o], 0     \n\t"            \
3714
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3715
        : [a] "r" (va), [b] "r" (vb)                     \
3716
        :                                                \
3717
    )
3718
/* Multiply va by vb and add double size result into: vo | vh | vl */
3719
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3720
    __asm__ __volatile__ (                               \
3721
        "mulld  16, %[a], %[b]    \n\t"            \
3722
        "mulhdu 17, %[a], %[b]    \n\t"            \
3723
        "addc %[l], %[l], 16    \n\t"            \
3724
        "adde %[h], %[h], 17    \n\t"            \
3725
        "addze  %[o], %[o]    \n\t"            \
3726
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3727
        : [a] "r" (va), [b] "r" (vb)                     \
3728
        : "16", "17", "cc"                               \
3729
    )
3730
/* Multiply va by vb and add double size result into: vh | vl */
3731
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3732
    __asm__ __volatile__ (                               \
3733
        "mulld  16, %[a], %[b]    \n\t"            \
3734
        "mulhdu 17, %[a], %[b]    \n\t"            \
3735
        "addc %[l], %[l], 16    \n\t"            \
3736
        "adde %[h], %[h], 17    \n\t"            \
3737
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3738
        : [a] "r" (va), [b] "r" (vb)                     \
3739
        : "16", "17", "cc"                               \
3740
    )
3741
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3742
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3743
    __asm__ __volatile__ (                               \
3744
        "mulld  16, %[a], %[b]    \n\t"            \
3745
        "mulhdu 17, %[a], %[b]    \n\t"            \
3746
        "addc %[l], %[l], 16    \n\t"            \
3747
        "adde %[h], %[h], 17    \n\t"            \
3748
        "addze  %[o], %[o]    \n\t"            \
3749
        "addc %[l], %[l], 16    \n\t"            \
3750
        "adde %[h], %[h], 17    \n\t"            \
3751
        "addze  %[o], %[o]    \n\t"            \
3752
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3753
        : [a] "r" (va), [b] "r" (vb)                     \
3754
        : "16", "17", "cc"                               \
3755
    )
3756
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3757
 * Assumes first add will not overflow vh | vl
3758
 */
3759
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3760
    __asm__ __volatile__ (                               \
3761
        "mulld  16, %[a], %[b]    \n\t"            \
3762
        "mulhdu 17, %[a], %[b]    \n\t"            \
3763
        "addc %[l], %[l], 16    \n\t"            \
3764
        "adde %[h], %[h], 17    \n\t"            \
3765
        "addc %[l], %[l], 16    \n\t"            \
3766
        "adde %[h], %[h], 17    \n\t"            \
3767
        "addze  %[o], %[o]    \n\t"            \
3768
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3769
        : [a] "r" (va), [b] "r" (vb)                     \
3770
        : "16", "17", "cc"                               \
3771
    )
3772
/* Square va and store double size result in: vh | vl */
3773
#define SP_ASM_SQR(vl, vh, va)                           \
3774
    __asm__ __volatile__ (                               \
3775
        "mulld  %[l], %[a], %[a]  \n\t"            \
3776
        "mulhdu %[h], %[a], %[a]  \n\t"            \
3777
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3778
        : [a] "r" (va)                                   \
3779
        :                                                \
3780
    )
3781
/* Square va and add double size result into: vo | vh | vl */
3782
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3783
    __asm__ __volatile__ (                               \
3784
        "mulld  16, %[a], %[a]    \n\t"            \
3785
        "mulhdu 17, %[a], %[a]    \n\t"            \
3786
        "addc %[l], %[l], 16    \n\t"            \
3787
        "adde %[h], %[h], 17    \n\t"            \
3788
        "addze  %[o], %[o]    \n\t"            \
3789
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3790
        : [a] "r" (va)                                   \
3791
        : "16", "17", "cc"                               \
3792
    )
3793
/* Square va and add double size result into: vh | vl */
3794
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3795
    __asm__ __volatile__ (                               \
3796
        "mulld  16, %[a], %[a]    \n\t"            \
3797
        "mulhdu 17, %[a], %[a]    \n\t"            \
3798
        "addc %[l], %[l], 16    \n\t"            \
3799
        "adde %[h], %[h], 17    \n\t"            \
3800
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3801
        : [a] "r" (va)                                   \
3802
        : "16", "17", "cc"                               \
3803
    )
3804
/* Add va into: vh | vl */
3805
#define SP_ASM_ADDC(vl, vh, va)                          \
3806
    __asm__ __volatile__ (                               \
3807
        "addc %[l], %[l], %[a]  \n\t"            \
3808
        "addze  %[h], %[h]    \n\t"            \
3809
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3810
        : [a] "r" (va)                                   \
3811
        : "cc"                                           \
3812
    )
3813
/* Sub va from: vh | vl */
3814
#define SP_ASM_SUBB(vl, vh, va)                          \
3815
    __asm__ __volatile__ (                               \
3816
        "subfc  %[l], %[a], %[l]  \n\t"            \
3817
        "li    16, 0      \n\t"            \
3818
        "subfe %[h], 16, %[h]   \n\t"            \
3819
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3820
        : [a] "r" (va)                                   \
3821
        : "16", "cc"                                     \
3822
    )
3823
/* Add two times vc | vb | va into vo | vh | vl */
3824
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3825
    __asm__ __volatile__ (                               \
3826
        "addc %[l], %[l], %[a]  \n\t"            \
3827
        "adde %[h], %[h], %[b]  \n\t"            \
3828
        "adde %[o], %[o], %[c]  \n\t"            \
3829
        "addc %[l], %[l], %[a]  \n\t"            \
3830
        "adde %[h], %[h], %[b]  \n\t"            \
3831
        "adde %[o], %[o], %[c]  \n\t"            \
3832
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3833
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3834
        : "cc"                                           \
3835
    )
3836
/* Count leading zeros. */
3837
#define SP_ASM_LZCNT(va, vn)                             \
3838
    __asm__ __volatile__ (                               \
3839
        "cntlzd %[n], %[a]  \n\t"                    \
3840
        : [n] "=r" (vn)                                  \
3841
        : [a] "r" (va)                                   \
3842
        :                                                \
3843
    )
3844
3845
    #endif /* !defined(__APPLE__) */
3846
3847
#define SP_INT_ASM_AVAILABLE
3848
3849
    #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
3850
3851
    #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
3852
/*
3853
 * CPU: PPC 32-bit
3854
 */
3855
3856
    #ifdef __APPLE__
3857
3858
/* Multiply va by vb and store double size result in: vh | vl */
3859
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3860
    __asm__ __volatile__ (                               \
3861
        "mullw  %[l], %[a], %[b]  \n\t"            \
3862
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3863
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3864
        : [a] "r" (va), [b] "r" (vb)                     \
3865
        :                                                \
3866
    )
3867
/* Multiply va by vb and store double size result in: vo | vh | vl */
3868
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3869
    __asm__ __volatile__ (                               \
3870
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3871
        "mullw  %[l], %[a], %[b]  \n\t"            \
3872
        "li %[o], 0     \n\t"            \
3873
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3874
        : [a] "r" (va), [b] "r" (vb)                     \
3875
    )
3876
/* Multiply va by vb and add double size result into: vo | vh | vl */
3877
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3878
    __asm__ __volatile__ (                               \
3879
        "mullw  r16, %[a], %[b]   \n\t"            \
3880
        "mulhwu r17, %[a], %[b]   \n\t"            \
3881
        "addc %[l], %[l], r16   \n\t"            \
3882
        "adde %[h], %[h], r17   \n\t"            \
3883
        "addze  %[o], %[o]    \n\t"            \
3884
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3885
        : [a] "r" (va), [b] "r" (vb)                     \
3886
        : "r16", "r17", "cc"                             \
3887
    )
3888
/* Multiply va by vb and add double size result into: vh | vl */
3889
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3890
    __asm__ __volatile__ (                               \
3891
        "mullw  r16, %[a], %[b]   \n\t"            \
3892
        "mulhwu r17, %[a], %[b]   \n\t"            \
3893
        "addc %[l], %[l], r16   \n\t"            \
3894
        "adde %[h], %[h], r17   \n\t"            \
3895
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3896
        : [a] "r" (va), [b] "r" (vb)                     \
3897
        : "r16", "r17", "cc"                             \
3898
    )
3899
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3900
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3901
    __asm__ __volatile__ (                               \
3902
        "mullw  r16, %[a], %[b]   \n\t"            \
3903
        "mulhwu r17, %[a], %[b]   \n\t"            \
3904
        "addc %[l], %[l], r16   \n\t"            \
3905
        "adde %[h], %[h], r17   \n\t"            \
3906
        "addze  %[o], %[o]    \n\t"            \
3907
        "addc %[l], %[l], r16   \n\t"            \
3908
        "adde %[h], %[h], r17   \n\t"            \
3909
        "addze  %[o], %[o]    \n\t"            \
3910
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3911
        : [a] "r" (va), [b] "r" (vb)                     \
3912
        : "r16", "r17", "cc"                             \
3913
    )
3914
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3915
 * Assumes first add will not overflow vh | vl
3916
 */
3917
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3918
    __asm__ __volatile__ (                               \
3919
        "mullw  r16, %[a], %[b]   \n\t"            \
3920
        "mulhwu r17, %[a], %[b]   \n\t"            \
3921
        "addc %[l], %[l], r16   \n\t"            \
3922
        "adde %[h], %[h], r17   \n\t"            \
3923
        "addc %[l], %[l], r16   \n\t"            \
3924
        "adde %[h], %[h], r17   \n\t"            \
3925
        "addze  %[o], %[o]    \n\t"            \
3926
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3927
        : [a] "r" (va), [b] "r" (vb)                     \
3928
        : "r16", "r17", "cc"                             \
3929
    )
3930
/* Square va and store double size result in: vh | vl */
3931
#define SP_ASM_SQR(vl, vh, va)                           \
3932
    __asm__ __volatile__ (                               \
3933
        "mullw  %[l], %[a], %[a]  \n\t"            \
3934
        "mulhwu %[h], %[a], %[a]  \n\t"            \
3935
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3936
        : [a] "r" (va)                                   \
3937
        :                                                \
3938
    )
3939
/* Square va and add double size result into: vo | vh | vl */
3940
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3941
    __asm__ __volatile__ (                               \
3942
        "mullw  r16, %[a], %[a]   \n\t"            \
3943
        "mulhwu r17, %[a], %[a]   \n\t"            \
3944
        "addc %[l], %[l], r16   \n\t"            \
3945
        "adde %[h], %[h], r17   \n\t"            \
3946
        "addze  %[o], %[o]    \n\t"            \
3947
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3948
        : [a] "r" (va)                                   \
3949
        : "r16", "r17", "cc"                             \
3950
    )
3951
/* Square va and add double size result into: vh | vl */
3952
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3953
    __asm__ __volatile__ (                               \
3954
        "mullw  r16, %[a], %[a]   \n\t"            \
3955
        "mulhwu r17, %[a], %[a]   \n\t"            \
3956
        "addc %[l], %[l], r16   \n\t"            \
3957
        "adde %[h], %[h], r17   \n\t"            \
3958
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3959
        : [a] "r" (va)                                   \
3960
        : "r16", "r17", "cc"                             \
3961
    )
3962
/* Add va into: vh | vl */
3963
#define SP_ASM_ADDC(vl, vh, va)                          \
3964
    __asm__ __volatile__ (                               \
3965
        "addc %[l], %[l], %[a]  \n\t"            \
3966
        "addze  %[h], %[h]    \n\t"            \
3967
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3968
        : [a] "r" (va)                                   \
3969
        : "cc"                                           \
3970
    )
3971
/* Sub va from: vh | vl */
3972
#define SP_ASM_SUBB(vl, vh, va)                          \
3973
    __asm__ __volatile__ (                               \
3974
        "subfc  %[l], %[a], %[l]  \n\t"            \
3975
        "li r16, 0      \n\t"            \
3976
        "subfe  %[h], r16, %[h]   \n\t"            \
3977
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3978
        : [a] "r" (va)                                   \
3979
        : "r16", "cc"                                    \
3980
    )
3981
/* Add two times vc | vb | va into vo | vh | vl */
3982
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3983
    __asm__ __volatile__ (                               \
3984
        "addc %[l], %[l], %[a]  \n\t"            \
3985
        "adde %[h], %[h], %[b]  \n\t"            \
3986
        "adde %[o], %[o], %[c]  \n\t"            \
3987
        "addc %[l], %[l], %[a]  \n\t"            \
3988
        "adde %[h], %[h], %[b]  \n\t"            \
3989
        "adde %[o], %[o], %[c]  \n\t"            \
3990
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3991
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3992
        : "cc"                                           \
3993
    )
3994
/* Count leading zeros. */
3995
#define SP_ASM_LZCNT(va, vn)                             \
3996
    __asm__ __volatile__ (                               \
3997
        "cntlzw %[n], %[a]  \n\t"                    \
3998
        : [n] "=r" (vn)                                  \
3999
        : [a] "r" (va)                                   \
4000
    )
4001
4002
    #else /* !defined(__APPLE__) */
4003
4004
/* Multiply va by vb and store double size result in: vh | vl */
4005
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4006
    __asm__ __volatile__ (                               \
4007
        "mullw  %[l], %[a], %[b]  \n\t"            \
4008
        "mulhwu %[h], %[a], %[b]  \n\t"            \
4009
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4010
        : [a] "r" (va), [b] "r" (vb)                     \
4011
        :                                                \
4012
    )
4013
/* Multiply va by vb and store double size result in: vo | vh | vl */
4014
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4015
    __asm__ __volatile__ (                               \
4016
        "mulhwu %[h], %[a], %[b]  \n\t"            \
4017
        "mullw  %[l], %[a], %[b]  \n\t"            \
4018
        "xor  %[o], %[o], %[o]  \n\t"            \
4019
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4020
        : [a] "r" (va), [b] "r" (vb)                     \
4021
    )
4022
/* Multiply va by vb and add double size result into: vo | vh | vl */
4023
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4024
    __asm__ __volatile__ (                               \
4025
        "mullw  16, %[a], %[b]    \n\t"            \
4026
        "mulhwu 17, %[a], %[b]    \n\t"            \
4027
        "addc %[l], %[l], 16    \n\t"            \
4028
        "adde %[h], %[h], 17    \n\t"            \
4029
        "addze  %[o], %[o]    \n\t"            \
4030
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4031
        : [a] "r" (va), [b] "r" (vb)                     \
4032
        : "16", "17", "cc"                               \
4033
    )
4034
/* Multiply va by vb and add double size result into: vh | vl */
4035
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4036
    __asm__ __volatile__ (                               \
4037
        "mullw  16, %[a], %[b]    \n\t"            \
4038
        "mulhwu 17, %[a], %[b]    \n\t"            \
4039
        "addc %[l], %[l], 16    \n\t"            \
4040
        "adde %[h], %[h], 17    \n\t"            \
4041
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4042
        : [a] "r" (va), [b] "r" (vb)                     \
4043
        : "16", "17", "cc"                               \
4044
    )
4045
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4046
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4047
    __asm__ __volatile__ (                               \
4048
        "mullw  16, %[a], %[b]    \n\t"            \
4049
        "mulhwu 17, %[a], %[b]    \n\t"            \
4050
        "addc %[l], %[l], 16    \n\t"            \
4051
        "adde %[h], %[h], 17    \n\t"            \
4052
        "addze  %[o], %[o]    \n\t"            \
4053
        "addc %[l], %[l], 16    \n\t"            \
4054
        "adde %[h], %[h], 17    \n\t"            \
4055
        "addze  %[o], %[o]    \n\t"            \
4056
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4057
        : [a] "r" (va), [b] "r" (vb)                     \
4058
        : "16", "17", "cc"                               \
4059
    )
4060
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4061
 * Assumes first add will not overflow vh | vl
4062
 */
4063
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4064
    __asm__ __volatile__ (                               \
4065
        "mullw  16, %[a], %[b]    \n\t"            \
4066
        "mulhwu 17, %[a], %[b]    \n\t"            \
4067
        "addc %[l], %[l], 16    \n\t"            \
4068
        "adde %[h], %[h], 17    \n\t"            \
4069
        "addc %[l], %[l], 16    \n\t"            \
4070
        "adde %[h], %[h], 17    \n\t"            \
4071
        "addze  %[o], %[o]    \n\t"            \
4072
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4073
        : [a] "r" (va), [b] "r" (vb)                     \
4074
        : "16", "17", "cc"                               \
4075
    )
4076
/* Square va and store double size result in: vh | vl */
4077
#define SP_ASM_SQR(vl, vh, va)                           \
4078
    __asm__ __volatile__ (                               \
4079
        "mullw  %[l], %[a], %[a]  \n\t"            \
4080
        "mulhwu %[h], %[a], %[a]  \n\t"            \
4081
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4082
        : [a] "r" (va)                                   \
4083
        :                                                \
4084
    )
4085
/* Square va and add double size result into: vo | vh | vl */
4086
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4087
    __asm__ __volatile__ (                               \
4088
        "mullw  16, %[a], %[a]    \n\t"            \
4089
        "mulhwu 17, %[a], %[a]    \n\t"            \
4090
        "addc %[l], %[l], 16    \n\t"            \
4091
        "adde %[h], %[h], 17    \n\t"            \
4092
        "addze  %[o], %[o]    \n\t"            \
4093
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4094
        : [a] "r" (va)                                   \
4095
        : "16", "17", "cc"                               \
4096
    )
4097
/* Square va and add double size result into: vh | vl */
4098
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4099
    __asm__ __volatile__ (                               \
4100
        "mullw  16, %[a], %[a]    \n\t"            \
4101
        "mulhwu 17, %[a], %[a]    \n\t"            \
4102
        "addc %[l], %[l], 16    \n\t"            \
4103
        "adde %[h], %[h], 17    \n\t"            \
4104
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4105
        : [a] "r" (va)                                   \
4106
        : "16", "17", "cc"                               \
4107
    )
4108
/* Add va into: vh | vl */
4109
#define SP_ASM_ADDC(vl, vh, va)                          \
4110
    __asm__ __volatile__ (                               \
4111
        "addc %[l], %[l], %[a]  \n\t"            \
4112
        "addze  %[h], %[h]    \n\t"            \
4113
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4114
        : [a] "r" (va)                                   \
4115
        : "cc"                                           \
4116
    )
4117
/* Sub va from: vh | vl */
4118
#define SP_ASM_SUBB(vl, vh, va)                          \
4119
    __asm__ __volatile__ (                               \
4120
        "subfc  %[l], %[a], %[l]  \n\t"            \
4121
        "xor  16, 16, 16    \n\t"            \
4122
        "subfe  %[h], 16, %[h]    \n\t"            \
4123
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4124
        : [a] "r" (va)                                   \
4125
        : "16", "cc"                                     \
4126
    )
4127
/* Add two times vc | vb | va into vo | vh | vl */
4128
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4129
    __asm__ __volatile__ (                               \
4130
        "addc %[l], %[l], %[a]  \n\t"            \
4131
        "adde %[h], %[h], %[b]  \n\t"            \
4132
        "adde %[o], %[o], %[c]  \n\t"            \
4133
        "addc %[l], %[l], %[a]  \n\t"            \
4134
        "adde %[h], %[h], %[b]  \n\t"            \
4135
        "adde %[o], %[o], %[c]  \n\t"            \
4136
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4137
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4138
        : "cc"                                           \
4139
    )
4140
/* Count leading zeros. */
4141
#define SP_ASM_LZCNT(va, vn)                             \
4142
    __asm__ __volatile__ (                               \
4143
        "cntlzw %[n], %[a]  \n\t"                    \
4144
        : [n] "=r" (vn)                                  \
4145
        : [a] "r" (va)                                   \
4146
    )
4147
4148
    #endif /* !defined(__APPLE__) */
4149
4150
#define SP_INT_ASM_AVAILABLE
4151
4152
    #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
4153
4154
    #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
4155
/*
4156
 * CPU: MIPS 64-bit
4157
 */
4158
4159
/* Multiply va by vb and store double size result in: vh | vl */
4160
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4161
    __asm__ __volatile__ (                               \
4162
        "dmultu %[a], %[b]    \n\t"            \
4163
        "mflo %[l]      \n\t"            \
4164
        "mfhi %[h]      \n\t"            \
4165
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4166
        : [a] "r" (va), [b] "r" (vb)                     \
4167
        : "$lo", "$hi"                                   \
4168
    )
4169
/* Multiply va by vb and store double size result in: vo | vh | vl */
4170
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4171
    __asm__ __volatile__ (                               \
4172
        "dmultu %[a], %[b]    \n\t"            \
4173
        "mflo %[l]      \n\t"            \
4174
        "mfhi %[h]      \n\t"            \
4175
        "move %[o], $0    \n\t"            \
4176
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4177
        : [a] "r" (va), [b] "r" (vb)                     \
4178
        : "$lo", "$hi"                                   \
4179
    )
4180
/* Multiply va by vb and add double size result into: vo | vh | vl */
4181
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4182
    __asm__ __volatile__ (                               \
4183
        "dmultu %[a], %[b]    \n\t"            \
4184
        "mflo $10     \n\t"            \
4185
        "mfhi $11     \n\t"            \
4186
        "daddu  %[l], %[l], $10   \n\t"            \
4187
        "sltu $12, %[l], $10    \n\t"            \
4188
        "daddu  %[h], %[h], $12   \n\t"            \
4189
        "sltu $12, %[h], $12    \n\t"            \
4190
        "daddu  %[o], %[o], $12   \n\t"            \
4191
        "daddu  %[h], %[h], $11   \n\t"            \
4192
        "sltu $12, %[h], $11    \n\t"            \
4193
        "daddu  %[o], %[o], $12   \n\t"            \
4194
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4195
        : [a] "r" (va), [b] "r" (vb)                     \
4196
        : "$10", "$11", "$12", "$lo", "$hi"              \
4197
    )
4198
/* Multiply va by vb and add double size result into: vh | vl */
4199
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4200
    __asm__ __volatile__ (                               \
4201
        "dmultu %[a], %[b]    \n\t"            \
4202
        "mflo $10     \n\t"            \
4203
        "mfhi $11     \n\t"            \
4204
        "daddu  %[l], %[l], $10   \n\t"            \
4205
        "sltu $12, %[l], $10    \n\t"            \
4206
        "daddu  %[h], %[h], $11   \n\t"            \
4207
        "daddu  %[h], %[h], $12   \n\t"            \
4208
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4209
        : [a] "r" (va), [b] "r" (vb)                     \
4210
        : "$10", "$11", "$12", "$lo", "$hi"              \
4211
    )
4212
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4213
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4214
    __asm__ __volatile__ (                               \
4215
        "dmultu %[a], %[b]    \n\t"            \
4216
        "mflo $10     \n\t"            \
4217
        "mfhi $11     \n\t"            \
4218
        "daddu  %[l], %[l], $10   \n\t"            \
4219
        "sltu $12, %[l], $10    \n\t"            \
4220
        "daddu  %[h], %[h], $12   \n\t"            \
4221
        "sltu $12, %[h], $12    \n\t"            \
4222
        "daddu  %[o], %[o], $12   \n\t"            \
4223
        "daddu  %[h], %[h], $11   \n\t"            \
4224
        "sltu $12, %[h], $11    \n\t"            \
4225
        "daddu  %[o], %[o], $12   \n\t"            \
4226
        "daddu  %[l], %[l], $10   \n\t"            \
4227
        "sltu $12, %[l], $10    \n\t"            \
4228
        "daddu  %[h], %[h], $12   \n\t"            \
4229
        "sltu $12, %[h], $12    \n\t"            \
4230
        "daddu  %[o], %[o], $12   \n\t"            \
4231
        "daddu  %[h], %[h], $11   \n\t"            \
4232
        "sltu $12, %[h], $11    \n\t"            \
4233
        "daddu  %[o], %[o], $12   \n\t"            \
4234
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4235
        : [a] "r" (va), [b] "r" (vb)                     \
4236
        : "$10", "$11", "$12", "$lo", "$hi"              \
4237
    )
4238
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4239
 * Assumes first add will not overflow vh | vl
4240
 */
4241
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4242
    __asm__ __volatile__ (                               \
4243
        "dmultu %[a], %[b]    \n\t"            \
4244
        "mflo $10     \n\t"            \
4245
        "mfhi $11     \n\t"            \
4246
        "daddu  %[l], %[l], $10   \n\t"            \
4247
        "sltu $12, %[l], $10    \n\t"            \
4248
        "daddu  %[h], %[h], $11   \n\t"            \
4249
        "daddu  %[h], %[h], $12   \n\t"            \
4250
        "daddu  %[l], %[l], $10   \n\t"            \
4251
        "sltu $12, %[l], $10    \n\t"            \
4252
        "daddu  %[h], %[h], $12   \n\t"            \
4253
        "sltu $12, %[h], $12    \n\t"            \
4254
        "daddu  %[o], %[o], $12   \n\t"            \
4255
        "daddu  %[h], %[h], $11   \n\t"            \
4256
        "sltu $12, %[h], $11    \n\t"            \
4257
        "daddu  %[o], %[o], $12   \n\t"            \
4258
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4259
        : [a] "r" (va), [b] "r" (vb)                     \
4260
        : "$10", "$11", "$12", "$lo", "$hi"              \
4261
    )
4262
/* Square va and store double size result in: vh | vl */
4263
#define SP_ASM_SQR(vl, vh, va)                           \
4264
    __asm__ __volatile__ (                               \
4265
        "dmultu %[a], %[a]    \n\t"            \
4266
        "mflo %[l]      \n\t"            \
4267
        "mfhi %[h]      \n\t"            \
4268
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4269
        : [a] "r" (va)                                   \
4270
        : "$lo", "$hi"                                   \
4271
    )
4272
/* Square va and add double size result into: vo | vh | vl */
4273
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4274
    __asm__ __volatile__ (                               \
4275
        "dmultu %[a], %[a]    \n\t"            \
4276
        "mflo $10     \n\t"            \
4277
        "mfhi $11     \n\t"            \
4278
        "daddu  %[l], %[l], $10   \n\t"            \
4279
        "sltu $12, %[l], $10    \n\t"            \
4280
        "daddu  %[h], %[h], $12   \n\t"            \
4281
        "sltu $12, %[h], $12    \n\t"            \
4282
        "daddu  %[o], %[o], $12   \n\t"            \
4283
        "daddu  %[h], %[h], $11   \n\t"            \
4284
        "sltu $12, %[h], $11    \n\t"            \
4285
        "daddu  %[o], %[o], $12   \n\t"            \
4286
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4287
        : [a] "r" (va)                                   \
4288
        : "$10", "$11", "$12", "$lo", "$hi"              \
4289
    )
4290
/* Square va and add double size result into: vh | vl */
4291
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4292
    __asm__ __volatile__ (                               \
4293
        "dmultu %[a], %[a]    \n\t"            \
4294
        "mflo $10     \n\t"            \
4295
        "mfhi $11     \n\t"            \
4296
        "daddu  %[l], %[l], $10   \n\t"            \
4297
        "sltu $12, %[l], $10    \n\t"            \
4298
        "daddu  %[h], %[h], $11   \n\t"            \
4299
        "daddu  %[h], %[h], $12   \n\t"            \
4300
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4301
        : [a] "r" (va)                                   \
4302
        : "$10", "$11", "$12", "$lo", "$hi"              \
4303
    )
4304
/* Add va into: vh | vl */
4305
#define SP_ASM_ADDC(vl, vh, va)                          \
4306
    __asm__ __volatile__ (                               \
4307
        "daddu  %[l], %[l], %[a]  \n\t"            \
4308
        "sltu $12, %[l], %[a]   \n\t"            \
4309
        "daddu  %[h], %[h], $12   \n\t"            \
4310
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4311
        : [a] "r" (va)                                   \
4312
        : "$12"                                          \
4313
    )
4314
/* Sub va from: vh | vl */
4315
#define SP_ASM_SUBB(vl, vh, va)                          \
4316
    __asm__ __volatile__ (                               \
4317
        "move $12, %[l]   \n\t"            \
4318
        "dsubu  %[l], $12, %[a]   \n\t"            \
4319
        "sltu $12, $12, %[l]    \n\t"            \
4320
        "dsubu  %[h], %[h], $12   \n\t"            \
4321
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4322
        : [a] "r" (va)                                   \
4323
        : "$12"                                          \
4324
    )
4325
/* Add two times vc | vb | va into vo | vh | vl */
4326
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4327
    __asm__ __volatile__ (                               \
4328
        "daddu  %[l], %[l], %[a]  \n\t"            \
4329
        "sltu $12, %[l], %[a]   \n\t"            \
4330
        "daddu  %[h], %[h], $12   \n\t"            \
4331
        "sltu $12, %[h], $12    \n\t"            \
4332
        "daddu  %[o], %[o], $12   \n\t"            \
4333
        "daddu  %[h], %[h], %[b]  \n\t"            \
4334
        "sltu $12, %[h], %[b]   \n\t"            \
4335
        "daddu  %[o], %[o], %[c]  \n\t"            \
4336
        "daddu  %[o], %[o], $12   \n\t"            \
4337
        "daddu  %[l], %[l], %[a]  \n\t"            \
4338
        "sltu $12, %[l], %[a]   \n\t"            \
4339
        "daddu  %[h], %[h], $12   \n\t"            \
4340
        "sltu $12, %[h], $12    \n\t"            \
4341
        "daddu  %[o], %[o], $12   \n\t"            \
4342
        "daddu  %[h], %[h], %[b]  \n\t"            \
4343
        "sltu $12, %[h], %[b]   \n\t"            \
4344
        "daddu  %[o], %[o], %[c]  \n\t"            \
4345
        "daddu  %[o], %[o], $12   \n\t"            \
4346
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4347
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4348
        : "$12"                                          \
4349
    )
4350
4351
#define SP_INT_ASM_AVAILABLE
4352
4353
    #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
4354
4355
    #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
4356
/*
4357
 * CPU: MIPS 32-bit
4358
 */
4359
4360
/* Multiply va by vb and store double size result in: vh | vl */
4361
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4362
    __asm__ __volatile__ (                               \
4363
        "multu  %[a], %[b]    \n\t"            \
4364
        "mflo %[l]      \n\t"            \
4365
        "mfhi %[h]      \n\t"            \
4366
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4367
        : [a] "r" (va), [b] "r" (vb)                     \
4368
        : "%lo", "%hi"                                   \
4369
    )
4370
/* Multiply va by vb and store double size result in: vo | vh | vl */
4371
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4372
    __asm__ __volatile__ (                               \
4373
        "multu  %[a], %[b]    \n\t"            \
4374
        "mflo %[l]      \n\t"            \
4375
        "mfhi %[h]      \n\t"            \
4376
        "move %[o], $0    \n\t"            \
4377
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4378
        : [a] "r" (va), [b] "r" (vb)                     \
4379
        : "%lo", "%hi"                                   \
4380
    )
4381
/* Multiply va by vb and add double size result into: vo | vh | vl */
4382
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4383
    __asm__ __volatile__ (                               \
4384
        "multu  %[a], %[b]    \n\t"            \
4385
        "mflo $10     \n\t"            \
4386
        "mfhi $11     \n\t"            \
4387
        "addu %[l], %[l], $10   \n\t"            \
4388
        "sltu $12, %[l], $10    \n\t"            \
4389
        "addu %[h], %[h], $12   \n\t"            \
4390
        "sltu $12, %[h], $12    \n\t"            \
4391
        "addu %[o], %[o], $12   \n\t"            \
4392
        "addu %[h], %[h], $11   \n\t"            \
4393
        "sltu $12, %[h], $11    \n\t"            \
4394
        "addu %[o], %[o], $12   \n\t"            \
4395
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4396
        : [a] "r" (va), [b] "r" (vb)                     \
4397
        : "$10", "$11", "$12", "%lo", "%hi"              \
4398
    )
4399
/* Multiply va by vb and add double size result into: vh | vl */
4400
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4401
    __asm__ __volatile__ (                               \
4402
        "multu  %[a], %[b]    \n\t"            \
4403
        "mflo $10     \n\t"            \
4404
        "mfhi $11     \n\t"            \
4405
        "addu %[l], %[l], $10   \n\t"            \
4406
        "sltu $12, %[l], $10    \n\t"            \
4407
        "addu %[h], %[h], $11   \n\t"            \
4408
        "addu %[h], %[h], $12   \n\t"            \
4409
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4410
        : [a] "r" (va), [b] "r" (vb)                     \
4411
        : "$10", "$11", "$12", "%lo", "%hi"              \
4412
    )
4413
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4414
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4415
    __asm__ __volatile__ (                               \
4416
        "multu  %[a], %[b]    \n\t"            \
4417
        "mflo $10     \n\t"            \
4418
        "mfhi $11     \n\t"            \
4419
        "addu %[l], %[l], $10   \n\t"            \
4420
        "sltu $12, %[l], $10    \n\t"            \
4421
        "addu %[h], %[h], $12   \n\t"            \
4422
        "sltu $12, %[h], $12    \n\t"            \
4423
        "addu %[o], %[o], $12   \n\t"            \
4424
        "addu %[h], %[h], $11   \n\t"            \
4425
        "sltu $12, %[h], $11    \n\t"            \
4426
        "addu %[o], %[o], $12   \n\t"            \
4427
        "addu %[l], %[l], $10   \n\t"            \
4428
        "sltu $12, %[l], $10    \n\t"            \
4429
        "addu %[h], %[h], $12   \n\t"            \
4430
        "sltu $12, %[h], $12    \n\t"            \
4431
        "addu %[o], %[o], $12   \n\t"            \
4432
        "addu %[h], %[h], $11   \n\t"            \
4433
        "sltu $12, %[h], $11    \n\t"            \
4434
        "addu %[o], %[o], $12   \n\t"            \
4435
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4436
        : [a] "r" (va), [b] "r" (vb)                     \
4437
        : "$10", "$11", "$12", "%lo", "%hi"              \
4438
    )
4439
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4440
 * Assumes first add will not overflow vh | vl
4441
 */
4442
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4443
    __asm__ __volatile__ (                               \
4444
        "multu  %[a], %[b]    \n\t"            \
4445
        "mflo $10     \n\t"            \
4446
        "mfhi $11     \n\t"            \
4447
        "addu %[l], %[l], $10   \n\t"            \
4448
        "sltu $12, %[l], $10    \n\t"            \
4449
        "addu %[h], %[h], $11   \n\t"            \
4450
        "addu %[h], %[h], $12   \n\t"            \
4451
        "addu %[l], %[l], $10   \n\t"            \
4452
        "sltu $12, %[l], $10    \n\t"            \
4453
        "addu %[h], %[h], $12   \n\t"            \
4454
        "sltu $12, %[h], $12    \n\t"            \
4455
        "addu %[o], %[o], $12   \n\t"            \
4456
        "addu %[h], %[h], $11   \n\t"            \
4457
        "sltu $12, %[h], $11    \n\t"            \
4458
        "addu %[o], %[o], $12   \n\t"            \
4459
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4460
        : [a] "r" (va), [b] "r" (vb)                     \
4461
        : "$10", "$11", "$12", "%lo", "%hi"              \
4462
    )
4463
/* Square va and store double size result in: vh | vl */
4464
#define SP_ASM_SQR(vl, vh, va)                           \
4465
    __asm__ __volatile__ (                               \
4466
        "multu  %[a], %[a]    \n\t"            \
4467
        "mflo %[l]      \n\t"            \
4468
        "mfhi %[h]      \n\t"            \
4469
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4470
        : [a] "r" (va)                                   \
4471
        : "%lo", "%hi"                                   \
4472
    )
4473
/* Square va and add double size result into: vo | vh | vl */
4474
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4475
    __asm__ __volatile__ (                               \
4476
        "multu  %[a], %[a]    \n\t"            \
4477
        "mflo $10     \n\t"            \
4478
        "mfhi $11     \n\t"            \
4479
        "addu %[l], %[l], $10   \n\t"            \
4480
        "sltu $12, %[l], $10    \n\t"            \
4481
        "addu %[h], %[h], $12   \n\t"            \
4482
        "sltu $12, %[h], $12    \n\t"            \
4483
        "addu %[o], %[o], $12   \n\t"            \
4484
        "addu %[h], %[h], $11   \n\t"            \
4485
        "sltu $12, %[h], $11    \n\t"            \
4486
        "addu %[o], %[o], $12   \n\t"            \
4487
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4488
        : [a] "r" (va)                                   \
4489
        : "$10", "$11", "$12", "%lo", "%hi"              \
4490
    )
4491
/* Square va and add double size result into: vh | vl */
4492
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4493
    __asm__ __volatile__ (                               \
4494
        "multu  %[a], %[a]    \n\t"            \
4495
        "mflo $10     \n\t"            \
4496
        "mfhi $11     \n\t"            \
4497
        "addu %[l], %[l], $10   \n\t"            \
4498
        "sltu $12, %[l], $10    \n\t"            \
4499
        "addu %[h], %[h], $11   \n\t"            \
4500
        "addu %[h], %[h], $12   \n\t"            \
4501
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4502
        : [a] "r" (va)                                   \
4503
        : "$10", "$11", "$12", "%lo", "%hi"              \
4504
    )
4505
/* Add va into: vh | vl */
4506
#define SP_ASM_ADDC(vl, vh, va)                          \
4507
    __asm__ __volatile__ (                               \
4508
        "addu %[l], %[l], %[a]  \n\t"            \
4509
        "sltu $12, %[l], %[a]   \n\t"            \
4510
        "addu %[h], %[h], $12   \n\t"            \
4511
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4512
        : [a] "r" (va)                                   \
4513
        : "$12"                                          \
4514
    )
4515
/* Sub va from: vh | vl */
4516
#define SP_ASM_SUBB(vl, vh, va)                          \
4517
    __asm__ __volatile__ (                               \
4518
        "move $12, %[l]   \n\t"            \
4519
        "subu %[l], $12, %[a]   \n\t"            \
4520
        "sltu $12, $12, %[l]    \n\t"            \
4521
        "subu %[h], %[h], $12   \n\t"            \
4522
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4523
        : [a] "r" (va)                                   \
4524
        : "$12"                                          \
4525
    )
4526
/* Add two times vc | vb | va into vo | vh | vl */
4527
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4528
    __asm__ __volatile__ (                               \
4529
        "addu %[l], %[l], %[a]  \n\t"            \
4530
        "sltu $12, %[l], %[a]   \n\t"            \
4531
        "addu %[h], %[h], $12   \n\t"            \
4532
        "sltu $12, %[h], $12    \n\t"            \
4533
        "addu %[o], %[o], $12   \n\t"            \
4534
        "addu %[h], %[h], %[b]  \n\t"            \
4535
        "sltu $12, %[h], %[b]   \n\t"            \
4536
        "addu %[o], %[o], %[c]  \n\t"            \
4537
        "addu %[o], %[o], $12   \n\t"            \
4538
        "addu %[l], %[l], %[a]  \n\t"            \
4539
        "sltu $12, %[l], %[a]   \n\t"            \
4540
        "addu %[h], %[h], $12   \n\t"            \
4541
        "sltu $12, %[h], $12    \n\t"            \
4542
        "addu %[o], %[o], $12   \n\t"            \
4543
        "addu %[h], %[h], %[b]  \n\t"            \
4544
        "sltu $12, %[h], %[b]   \n\t"            \
4545
        "addu %[o], %[o], %[c]  \n\t"            \
4546
        "addu %[o], %[o], $12   \n\t"            \
4547
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4548
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4549
        : "$12"                                          \
4550
    )
4551
4552
#define SP_INT_ASM_AVAILABLE
4553
4554
    #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
4555
4556
    #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
4557
/*
4558
 * CPU: RISCV 64-bit
4559
 */
4560
4561
/* Multiply va by vb and store double size result in: vh | vl */
4562
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4563
    __asm__ __volatile__ (                               \
4564
        "mul  %[l], %[a], %[b]  \n\t"            \
4565
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4566
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4567
        : [a] "r" (va), [b] "r" (vb)                     \
4568
        :                                                \
4569
    )
4570
/* Multiply va by vb and store double size result in: vo | vh | vl */
4571
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4572
    __asm__ __volatile__ (                               \
4573
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4574
        "mul  %[l], %[a], %[b]  \n\t"            \
4575
        "add  %[o], zero, zero  \n\t"            \
4576
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4577
        : [a] "r" (va), [b] "r" (vb)                     \
4578
        :                                                \
4579
    )
4580
/* Multiply va by vb and add double size result into: vo | vh | vl */
4581
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4582
    __asm__ __volatile__ (                               \
4583
        "mul  a5, %[a], %[b]    \n\t"            \
4584
        "mulhu  a6, %[a], %[b]    \n\t"            \
4585
        "add  %[l], %[l], a5    \n\t"            \
4586
        "sltu a7, %[l], a5    \n\t"            \
4587
        "add  %[h], %[h], a7    \n\t"            \
4588
        "sltu a7, %[h], a7    \n\t"            \
4589
        "add  %[o], %[o], a7    \n\t"            \
4590
        "add  %[h], %[h], a6    \n\t"            \
4591
        "sltu a7, %[h], a6    \n\t"            \
4592
        "add  %[o], %[o], a7    \n\t"            \
4593
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4594
        : [a] "r" (va), [b] "r" (vb)                     \
4595
        : "a5", "a6", "a7"                               \
4596
    )
4597
/* Multiply va by vb and add double size result into: vh | vl */
4598
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4599
    __asm__ __volatile__ (                               \
4600
        "mul  a5, %[a], %[b]    \n\t"            \
4601
        "mulhu  a6, %[a], %[b]    \n\t"            \
4602
        "add  %[l], %[l], a5    \n\t"            \
4603
        "sltu a7, %[l], a5    \n\t"            \
4604
        "add  %[h], %[h], a6    \n\t"            \
4605
        "add  %[h], %[h], a7    \n\t"            \
4606
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4607
        : [a] "r" (va), [b] "r" (vb)                     \
4608
        : "a5", "a6", "a7"                               \
4609
    )
4610
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4611
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4612
    __asm__ __volatile__ (                               \
4613
        "mul  a5, %[a], %[b]    \n\t"            \
4614
        "mulhu  a6, %[a], %[b]    \n\t"            \
4615
        "add  %[l], %[l], a5    \n\t"            \
4616
        "sltu a7, %[l], a5    \n\t"            \
4617
        "add  %[h], %[h], a7    \n\t"            \
4618
        "sltu a7, %[h], a7    \n\t"            \
4619
        "add  %[o], %[o], a7    \n\t"            \
4620
        "add  %[h], %[h], a6    \n\t"            \
4621
        "sltu a7, %[h], a6    \n\t"            \
4622
        "add  %[o], %[o], a7    \n\t"            \
4623
        "add  %[l], %[l], a5    \n\t"            \
4624
        "sltu a7, %[l], a5    \n\t"            \
4625
        "add  %[h], %[h], a7    \n\t"            \
4626
        "sltu a7, %[h], a7    \n\t"            \
4627
        "add  %[o], %[o], a7    \n\t"            \
4628
        "add  %[h], %[h], a6    \n\t"            \
4629
        "sltu a7, %[h], a6    \n\t"            \
4630
        "add  %[o], %[o], a7    \n\t"            \
4631
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4632
        : [a] "r" (va), [b] "r" (vb)                     \
4633
        : "a5", "a6", "a7"                               \
4634
    )
4635
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4636
 * Assumes first add will not overflow vh | vl
4637
 */
4638
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4639
    __asm__ __volatile__ (                               \
4640
        "mul  a5, %[a], %[b]    \n\t"            \
4641
        "mulhu  a6, %[a], %[b]    \n\t"            \
4642
        "add  %[l], %[l], a5    \n\t"            \
4643
        "sltu a7, %[l], a5    \n\t"            \
4644
        "add  %[h], %[h], a6    \n\t"            \
4645
        "add  %[h], %[h], a7    \n\t"            \
4646
        "add  %[l], %[l], a5    \n\t"            \
4647
        "sltu a7, %[l], a5    \n\t"            \
4648
        "add  %[h], %[h], a7    \n\t"            \
4649
        "sltu a7, %[h], a7    \n\t"            \
4650
        "add  %[o], %[o], a7    \n\t"            \
4651
        "add  %[h], %[h], a6    \n\t"            \
4652
        "sltu a7, %[h], a6    \n\t"            \
4653
        "add  %[o], %[o], a7    \n\t"            \
4654
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4655
        : [a] "r" (va), [b] "r" (vb)                     \
4656
        : "a5", "a6", "a7"                               \
4657
    )
4658
/* Square va and store double size result in: vh | vl */
4659
#define SP_ASM_SQR(vl, vh, va)                           \
4660
    __asm__ __volatile__ (                               \
4661
        "mul  %[l], %[a], %[a]  \n\t"            \
4662
        "mulhu  %[h], %[a], %[a]  \n\t"            \
4663
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4664
        : [a] "r" (va)                                   \
4665
        :                                                \
4666
    )
4667
/* Square va and add double size result into: vo | vh | vl */
4668
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4669
    __asm__ __volatile__ (                               \
4670
        "mul  a5, %[a], %[a]    \n\t"            \
4671
        "mulhu  a6, %[a], %[a]    \n\t"            \
4672
        "add  %[l], %[l], a5    \n\t"            \
4673
        "sltu a7, %[l], a5    \n\t"            \
4674
        "add  %[h], %[h], a7    \n\t"            \
4675
        "sltu a7, %[h], a7    \n\t"            \
4676
        "add  %[o], %[o], a7    \n\t"            \
4677
        "add  %[h], %[h], a6    \n\t"            \
4678
        "sltu a7, %[h], a6    \n\t"            \
4679
        "add  %[o], %[o], a7    \n\t"            \
4680
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4681
        : [a] "r" (va)                                   \
4682
        : "a5", "a6", "a7"                               \
4683
    )
4684
/* Square va and add double size result into: vh | vl */
4685
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4686
    __asm__ __volatile__ (                               \
4687
        "mul  a5, %[a], %[a]    \n\t"            \
4688
        "mulhu  a6, %[a], %[a]    \n\t"            \
4689
        "add  %[l], %[l], a5    \n\t"            \
4690
        "sltu a7, %[l], a5    \n\t"            \
4691
        "add  %[h], %[h], a6    \n\t"            \
4692
        "add  %[h], %[h], a7    \n\t"            \
4693
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4694
        : [a] "r" (va)                                   \
4695
        : "a5", "a6", "a7"                               \
4696
    )
4697
/* Add va into: vh | vl */
4698
#define SP_ASM_ADDC(vl, vh, va)                          \
4699
    __asm__ __volatile__ (                               \
4700
        "add  %[l], %[l], %[a]  \n\t"            \
4701
        "sltu a7, %[l], %[a]    \n\t"            \
4702
        "add  %[h], %[h], a7    \n\t"            \
4703
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4704
        : [a] "r" (va)                                   \
4705
        : "a7"                                           \
4706
    )
4707
/* Sub va from: vh | vl */
4708
#define SP_ASM_SUBB(vl, vh, va)                          \
4709
    __asm__ __volatile__ (                               \
4710
        "add  a7, %[l], zero    \n\t"            \
4711
        "sub  %[l], a7, %[a]    \n\t"            \
4712
        "sltu a7, a7, %[l]    \n\t"            \
4713
        "sub  %[h], %[h], a7    \n\t"            \
4714
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4715
        : [a] "r" (va)                                   \
4716
        : "a7"                                           \
4717
    )
4718
/* Add two times vc | vb | va into vo | vh | vl */
4719
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4720
    __asm__ __volatile__ (                               \
4721
        "add  %[l], %[l], %[a]  \n\t"            \
4722
        "sltu a7, %[l], %[a]    \n\t"            \
4723
        "add  %[h], %[h], a7    \n\t"            \
4724
        "sltu a7, %[h], a7    \n\t"            \
4725
        "add  %[o], %[o], a7    \n\t"            \
4726
        "add  %[h], %[h], %[b]  \n\t"            \
4727
        "sltu a7, %[h], %[b]    \n\t"            \
4728
        "add  %[o], %[o], %[c]  \n\t"            \
4729
        "add  %[o], %[o], a7    \n\t"            \
4730
        "add  %[l], %[l], %[a]  \n\t"            \
4731
        "sltu a7, %[l], %[a]    \n\t"            \
4732
        "add  %[h], %[h], a7    \n\t"            \
4733
        "sltu a7, %[h], a7    \n\t"            \
4734
        "add  %[o], %[o], a7    \n\t"            \
4735
        "add  %[h], %[h], %[b]  \n\t"            \
4736
        "sltu a7, %[h], %[b]    \n\t"            \
4737
        "add  %[o], %[o], %[c]  \n\t"            \
4738
        "add  %[o], %[o], a7    \n\t"            \
4739
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4740
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4741
        : "a7"                                           \
4742
    )
4743
4744
#define SP_INT_ASM_AVAILABLE
4745
4746
    #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
4747
4748
    #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
4749
/*
4750
 * CPU: RISCV 32-bit
4751
 */
4752
4753
/* Multiply va by vb and store double size result in: vh | vl */
4754
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4755
    __asm__ __volatile__ (                               \
4756
        "mul  %[l], %[a], %[b]  \n\t"            \
4757
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4758
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4759
        : [a] "r" (va), [b] "r" (vb)                     \
4760
        :                                                \
4761
    )
4762
/* Multiply va by vb and store double size result in: vo | vh | vl */
4763
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4764
    __asm__ __volatile__ (                               \
4765
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4766
        "mul  %[l], %[a], %[b]  \n\t"            \
4767
        "add  %[o], zero, zero  \n\t"            \
4768
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4769
        : [a] "r" (va), [b] "r" (vb)                     \
4770
        :                                                \
4771
    )
4772
/* Multiply va by vb and add double size result into: vo | vh | vl */
4773
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4774
    __asm__ __volatile__ (                               \
4775
        "mul  a5, %[a], %[b]    \n\t"            \
4776
        "mulhu  a6, %[a], %[b]    \n\t"            \
4777
        "add  %[l], %[l], a5    \n\t"            \
4778
        "sltu a7, %[l], a5    \n\t"            \
4779
        "add  %[h], %[h], a7    \n\t"            \
4780
        "sltu a7, %[h], a7    \n\t"            \
4781
        "add  %[o], %[o], a7    \n\t"            \
4782
        "add  %[h], %[h], a6    \n\t"            \
4783
        "sltu a7, %[h], a6    \n\t"            \
4784
        "add  %[o], %[o], a7    \n\t"            \
4785
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4786
        : [a] "r" (va), [b] "r" (vb)                     \
4787
        : "a5", "a6", "a7"                               \
4788
    )
4789
/* Multiply va by vb and add double size result into: vh | vl */
4790
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4791
    __asm__ __volatile__ (                               \
4792
        "mul  a5, %[a], %[b]    \n\t"            \
4793
        "mulhu  a6, %[a], %[b]    \n\t"            \
4794
        "add  %[l], %[l], a5    \n\t"            \
4795
        "sltu a7, %[l], a5    \n\t"            \
4796
        "add  %[h], %[h], a6    \n\t"            \
4797
        "add  %[h], %[h], a7    \n\t"            \
4798
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4799
        : [a] "r" (va), [b] "r" (vb)                     \
4800
        : "a5", "a6", "a7"                               \
4801
    )
4802
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4803
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4804
    __asm__ __volatile__ (                               \
4805
        "mul  a5, %[a], %[b]    \n\t"            \
4806
        "mulhu  a6, %[a], %[b]    \n\t"            \
4807
        "add  %[l], %[l], a5    \n\t"            \
4808
        "sltu a7, %[l], a5    \n\t"            \
4809
        "add  %[h], %[h], a7    \n\t"            \
4810
        "sltu a7, %[h], a7    \n\t"            \
4811
        "add  %[o], %[o], a7    \n\t"            \
4812
        "add  %[h], %[h], a6    \n\t"            \
4813
        "sltu a7, %[h], a6    \n\t"            \
4814
        "add  %[o], %[o], a7    \n\t"            \
4815
        "add  %[l], %[l], a5    \n\t"            \
4816
        "sltu a7, %[l], a5    \n\t"            \
4817
        "add  %[h], %[h], a7    \n\t"            \
4818
        "sltu a7, %[h], a7    \n\t"            \
4819
        "add  %[o], %[o], a7    \n\t"            \
4820
        "add  %[h], %[h], a6    \n\t"            \
4821
        "sltu a7, %[h], a6    \n\t"            \
4822
        "add  %[o], %[o], a7    \n\t"            \
4823
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4824
        : [a] "r" (va), [b] "r" (vb)                     \
4825
        : "a5", "a6", "a7"                               \
4826
    )
4827
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4828
 * Assumes first add will not overflow vh | vl
4829
 */
4830
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4831
    __asm__ __volatile__ (                               \
4832
        "mul  a5, %[a], %[b]    \n\t"            \
4833
        "mulhu  a6, %[a], %[b]    \n\t"            \
4834
        "add  %[l], %[l], a5    \n\t"            \
4835
        "sltu a7, %[l], a5    \n\t"            \
4836
        "add  %[h], %[h], a6    \n\t"            \
4837
        "add  %[h], %[h], a7    \n\t"            \
4838
        "add  %[l], %[l], a5    \n\t"            \
4839
        "sltu a7, %[l], a5    \n\t"            \
4840
        "add  %[h], %[h], a7    \n\t"            \
4841
        "sltu a7, %[h], a7    \n\t"            \
4842
        "add  %[o], %[o], a7    \n\t"            \
4843
        "add  %[h], %[h], a6    \n\t"            \
4844
        "sltu a7, %[h], a6    \n\t"            \
4845
        "add  %[o], %[o], a7    \n\t"            \
4846
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4847
        : [a] "r" (va), [b] "r" (vb)                     \
4848
        : "a5", "a6", "a7"                               \
4849
    )
4850
/* Square va and store double size result in: vh | vl */
4851
#define SP_ASM_SQR(vl, vh, va)                           \
4852
    __asm__ __volatile__ (                               \
4853
        "mul  %[l], %[a], %[a]  \n\t"            \
4854
        "mulhu  %[h], %[a], %[a]  \n\t"            \
4855
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4856
        : [a] "r" (va)                                   \
4857
        :                                                \
4858
    )
4859
/* Square va and add double size result into: vo | vh | vl */
4860
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4861
    __asm__ __volatile__ (                               \
4862
        "mul  a5, %[a], %[a]    \n\t"            \
4863
        "mulhu  a6, %[a], %[a]    \n\t"            \
4864
        "add  %[l], %[l], a5    \n\t"            \
4865
        "sltu a7, %[l], a5    \n\t"            \
4866
        "add  %[h], %[h], a7    \n\t"            \
4867
        "sltu a7, %[h], a7    \n\t"            \
4868
        "add  %[o], %[o], a7    \n\t"            \
4869
        "add  %[h], %[h], a6    \n\t"            \
4870
        "sltu a7, %[h], a6    \n\t"            \
4871
        "add  %[o], %[o], a7    \n\t"            \
4872
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4873
        : [a] "r" (va)                                   \
4874
        : "a5", "a6", "a7"                               \
4875
    )
4876
/* Square va and add double size result into: vh | vl */
4877
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4878
    __asm__ __volatile__ (                               \
4879
        "mul  a5, %[a], %[a]    \n\t"            \
4880
        "mulhu  a6, %[a], %[a]    \n\t"            \
4881
        "add  %[l], %[l], a5    \n\t"            \
4882
        "sltu a7, %[l], a5    \n\t"            \
4883
        "add  %[h], %[h], a6    \n\t"            \
4884
        "add  %[h], %[h], a7    \n\t"            \
4885
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4886
        : [a] "r" (va)                                   \
4887
        : "a5", "a6", "a7"                               \
4888
    )
4889
/* Add va into: vh | vl */
4890
#define SP_ASM_ADDC(vl, vh, va)                          \
4891
    __asm__ __volatile__ (                               \
4892
        "add  %[l], %[l], %[a]  \n\t"            \
4893
        "sltu a7, %[l], %[a]    \n\t"            \
4894
        "add  %[h], %[h], a7    \n\t"            \
4895
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4896
        : [a] "r" (va)                                   \
4897
        : "a7"                                           \
4898
    )
4899
/* Sub va from: vh | vl */
4900
#define SP_ASM_SUBB(vl, vh, va)                          \
4901
    __asm__ __volatile__ (                               \
4902
        "add  a7, %[l], zero    \n\t"            \
4903
        "sub  %[l], a7, %[a]    \n\t"            \
4904
        "sltu a7, a7, %[l]    \n\t"            \
4905
        "sub  %[h], %[h], a7    \n\t"            \
4906
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4907
        : [a] "r" (va)                                   \
4908
        : "a7"                                           \
4909
    )
4910
/* Add two times vc | vb | va into vo | vh | vl */
4911
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4912
    __asm__ __volatile__ (                               \
4913
        "add  %[l], %[l], %[a]  \n\t"            \
4914
        "sltu a7, %[l], %[a]    \n\t"            \
4915
        "add  %[h], %[h], a7    \n\t"            \
4916
        "sltu a7, %[h], a7    \n\t"            \
4917
        "add  %[o], %[o], a7    \n\t"            \
4918
        "add  %[h], %[h], %[b]  \n\t"            \
4919
        "sltu a7, %[h], %[b]    \n\t"            \
4920
        "add  %[o], %[o], %[c]  \n\t"            \
4921
        "add  %[o], %[o], a7    \n\t"            \
4922
        "add  %[l], %[l], %[a]  \n\t"            \
4923
        "sltu a7, %[l], %[a]    \n\t"            \
4924
        "add  %[h], %[h], a7    \n\t"            \
4925
        "sltu a7, %[h], a7    \n\t"            \
4926
        "add  %[o], %[o], a7    \n\t"            \
4927
        "add  %[h], %[h], %[b]  \n\t"            \
4928
        "sltu a7, %[h], %[b]    \n\t"            \
4929
        "add  %[o], %[o], %[c]  \n\t"            \
4930
        "add  %[o], %[o], a7    \n\t"            \
4931
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4932
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4933
        : "a7"                                           \
4934
    )
4935
4936
#define SP_INT_ASM_AVAILABLE
4937
4938
    #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
4939
4940
    #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
4941
/*
4942
 * CPU: Intel s390x
4943
 */
4944
4945
/* Multiply va by vb and store double size result in: vh | vl */
4946
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4947
    __asm__ __volatile__ (                               \
4948
        "lgr  %%r1, %[a]    \n\t"            \
4949
        "mlgr %%r0, %[b]    \n\t"            \
4950
        "lgr  %[l], %%r1    \n\t"            \
4951
        "lgr  %[h], %%r0    \n\t"            \
4952
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4953
        : [a] "r" (va), [b] "r" (vb)                     \
4954
        : "r0", "r1"                                     \
4955
    )
4956
/* Multiply va by vb and store double size result in: vo | vh | vl */
4957
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4958
    __asm__ __volatile__ (                               \
4959
        "lgr  %%r1, %[a]    \n\t"            \
4960
        "mlgr %%r0, %[b]    \n\t"            \
4961
        "lghi %[o], 0     \n\t"            \
4962
        "lgr  %[l], %%r1    \n\t"            \
4963
        "lgr  %[h], %%r0    \n\t"            \
4964
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4965
        : [a] "r" (va), [b] "r" (vb)                     \
4966
        : "r0", "r1"                                     \
4967
    )
4968
/* Multiply va by vb and add double size result into: vo | vh | vl */
4969
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4970
    __asm__ __volatile__ (                               \
4971
        "lghi %%r10, 0  \n\t"                    \
4972
        "lgr  %%r1, %[a]    \n\t"            \
4973
        "mlgr %%r0, %[b]    \n\t"            \
4974
        "algr %[l], %%r1  \n\t"                    \
4975
        "alcgr  %[h], %%r0  \n\t"                    \
4976
        "alcgr  %[o], %%r10 \n\t"                    \
4977
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4978
        : [a] "r" (va), [b] "r" (vb)                     \
4979
        : "r0", "r1", "r10", "cc"                        \
4980
    )
4981
/* Multiply va by vb and add double size result into: vh | vl */
4982
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4983
    __asm__ __volatile__ (                               \
4984
        "lgr  %%r1, %[a]    \n\t"            \
4985
        "mlgr %%r0, %[b]    \n\t"            \
4986
        "algr %[l], %%r1  \n\t"                    \
4987
        "alcgr  %[h], %%r0  \n\t"                    \
4988
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4989
        : [a] "r" (va), [b] "r" (vb)                     \
4990
        : "r0", "r1", "cc"                               \
4991
    )
4992
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4993
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4994
    __asm__ __volatile__ (                               \
4995
        "lghi %%r10, 0  \n\t"                    \
4996
        "lgr  %%r1, %[a]    \n\t"            \
4997
        "mlgr %%r0, %[b]    \n\t"            \
4998
        "algr %[l], %%r1  \n\t"                    \
4999
        "alcgr  %[h], %%r0  \n\t"                    \
5000
        "alcgr  %[o], %%r10 \n\t"                    \
5001
        "algr %[l], %%r1  \n\t"                    \
5002
        "alcgr  %[h], %%r0  \n\t"                    \
5003
        "alcgr  %[o], %%r10 \n\t"                    \
5004
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
5005
        : [a] "r" (va), [b] "r" (vb)                     \
5006
        : "r0", "r1", "r10", "cc"                        \
5007
    )
5008
/* Multiply va by vb and add double size result twice into: vo | vh | vl
5009
 * Assumes first add will not overflow vh | vl
5010
 */
5011
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
5012
    __asm__ __volatile__ (                               \
5013
        "lghi %%r10, 0  \n\t"                    \
5014
        "lgr  %%r1, %[a]    \n\t"            \
5015
        "mlgr %%r0, %[b]    \n\t"            \
5016
        "algr %[l], %%r1  \n\t"                    \
5017
        "alcgr  %[h], %%r0  \n\t"                    \
5018
        "algr %[l], %%r1  \n\t"                    \
5019
        "alcgr  %[h], %%r0  \n\t"                    \
5020
        "alcgr  %[o], %%r10 \n\t"                    \
5021
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
5022
        : [a] "r" (va), [b] "r" (vb)                     \
5023
        : "r0", "r1", "r10", "cc"                        \
5024
    )
5025
/* Square va and store double size result in: vh | vl */
5026
#define SP_ASM_SQR(vl, vh, va)                           \
5027
    __asm__ __volatile__ (                               \
5028
        "lgr  %%r1, %[a]    \n\t"            \
5029
        "mlgr %%r0, %%r1    \n\t"            \
5030
        "lgr  %[l], %%r1    \n\t"            \
5031
        "lgr  %[h], %%r0    \n\t"            \
5032
        : [h] "+r" (vh), [l] "+r" (vl)                   \
5033
        : [a] "r" (va)                                   \
5034
        : "r0", "r1"                                     \
5035
    )
5036
/* Square va and add double size result into: vo | vh | vl */
5037
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
5038
    __asm__ __volatile__ (                               \
5039
        "lghi %%r10, 0  \n\t"                    \
5040
        "lgr  %%r1, %[a]    \n\t"            \
5041
        "mlgr %%r0, %%r1    \n\t"            \
5042
        "algr %[l], %%r1  \n\t"                    \
5043
        "alcgr  %[h], %%r0  \n\t"                    \
5044
        "alcgr  %[o], %%r10 \n\t"                    \
5045
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
5046
        : [a] "r" (va)                                   \
5047
        : "r0", "r1", "r10", "cc"                        \
5048
    )
5049
/* Square va and add double size result into: vh | vl */
5050
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
5051
    __asm__ __volatile__ (                               \
5052
        "lgr  %%r1, %[a]    \n\t"            \
5053
        "mlgr %%r0, %%r1    \n\t"            \
5054
        "algr %[l], %%r1  \n\t"                    \
5055
        "alcgr  %[h], %%r0  \n\t"                    \
5056
        : [l] "+r" (vl), [h] "+r" (vh)                   \
5057
        : [a] "r" (va)                                   \
5058
        : "r0", "r1", "cc"                               \
5059
    )
5060
/* Add va into: vh | vl */
5061
#define SP_ASM_ADDC(vl, vh, va)                          \
5062
    __asm__ __volatile__ (                               \
5063
        "lghi %%r10, 0  \n\t"                    \
5064
        "algr %[l], %[a]  \n\t"                    \
5065
        "alcgr  %[h], %%r10 \n\t"                    \
5066
        : [l] "+r" (vl), [h] "+r" (vh)                   \
5067
        : [a] "r" (va)                                   \
5068
        : "r10", "cc"                                    \
5069
    )
5070
/* Sub va from: vh | vl */
5071
#define SP_ASM_SUBB(vl, vh, va)                          \
5072
    __asm__ __volatile__ (                               \
5073
        "lghi %%r10, 0  \n\t"                    \
5074
        "slgr %[l], %[a]  \n\t"                    \
5075
        "slbgr  %[h], %%r10 \n\t"                    \
5076
        : [l] "+r" (vl), [h] "+r" (vh)                   \
5077
        : [a] "r" (va)                                   \
5078
        : "r10", "cc"                                    \
5079
    )
5080
/* Add two times vc | vb | va into vo | vh | vl */
5081
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
5082
    __asm__ __volatile__ (                               \
5083
        "algr %[l], %[a]  \n\t"                    \
5084
        "alcgr  %[h], %[b]  \n\t"                    \
5085
        "alcgr  %[o], %[c]  \n\t"                    \
5086
        "algr %[l], %[a]  \n\t"                    \
5087
        "alcgr  %[h], %[b]  \n\t"                    \
5088
        "alcgr  %[o], %[c]  \n\t"                    \
5089
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
5090
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
5091
        : "cc"                                           \
5092
    )
5093
5094
#define SP_INT_ASM_AVAILABLE
5095
5096
    #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
5097
5098
#ifdef SP_INT_ASM_AVAILABLE
5099
    #ifndef SP_INT_NO_ASM
5100
        #define SQR_MUL_ASM
5101
    #endif
5102
    #ifndef SP_ASM_ADDC_REG
5103
        #define SP_ASM_ADDC_REG  SP_ASM_ADDC
5104
    #endif /* SP_ASM_ADDC_REG */
5105
    #ifndef SP_ASM_SUBB_REG
5106
        #define SP_ASM_SUBB_REG  SP_ASM_SUBB
5107
    #endif /* SP_ASM_SUBB_REG */
5108
#endif /* SQR_MUL_ASM */
5109
5110
#endif /* !WOLFSSL_NO_ASM */
5111
5112
5113
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
5114
    !defined(NO_DSA) || !defined(NO_DH) || \
5115
    (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
5116
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
5117
#ifndef WC_NO_CACHE_RESISTANT
5118
#ifdef WC_NO_PTR_INT_CAST
5119
static void _sp_cond_copy(const sp_int* a, int copy, sp_int* r, sp_size_t used)
5120
{
5121
    sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)copy;
5122
    unsigned int i;
5123
5124
    for (i = 0; i < (unsigned int)used; i++) {
5125
        r->dp[i] ^= (r->dp[i] ^ a->dp[i]) & mask;
5126
    }
5127
    r->used ^= (r->used ^ a->used) & (sp_size_t)mask;
5128
#ifdef WOLFSSL_SP_INT_NEGATIVE
5129
    r->sign ^= (r->sign ^ a->sign) & (sp_sign_t)mask;
5130
#endif
5131
}
5132
#else
5133
    /* Mask of address for constant time operations. */
5134
    const size_t sp_off_on_addr[2] =
5135
    {
5136
        (size_t) 0,
5137
        (size_t)-1
5138
    };
5139
#endif
5140
#endif
5141
#endif
5142
5143
5144
#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
5145
5146
#ifdef __cplusplus
5147
extern "C" {
5148
#endif
5149
5150
/* Modular exponentiation implementations using Single Precision. */
5151
WOLFSSL_LOCAL int sp_ModExp_1024(const sp_int* base, const sp_int* exp,
5152
    const sp_int* mod, sp_int* res);
5153
WOLFSSL_LOCAL int sp_ModExp_1536(const sp_int* base, const sp_int* exp,
5154
    const sp_int* mod, sp_int* res);
5155
WOLFSSL_LOCAL int sp_ModExp_2048(const sp_int* base, const sp_int* exp,
5156
    const sp_int* mod, sp_int* res);
5157
WOLFSSL_LOCAL int sp_ModExp_3072(const sp_int* base, const sp_int* exp,
5158
    const sp_int* mod, sp_int* res);
5159
WOLFSSL_LOCAL int sp_ModExp_4096(const sp_int* base, const sp_int* exp,
5160
    const sp_int* mod, sp_int* res);
5161
5162
#ifdef __cplusplus
5163
} /* extern "C" */
5164
#endif
5165
5166
#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
5167
5168
5169
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5170
    defined(OPENSSL_ALL)
5171
static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct);
5172
#endif
5173
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5174
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
5175
    defined(OPENSSL_ALL)
5176
static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho);
5177
#endif
5178
5179
5180
/* Set the multi-precision number to zero.
5181
 *
5182
 * Assumes a is not NULL.
5183
 *
5184
 * @param [out] a  SP integer to set to zero.
5185
 */
5186
static void _sp_zero(volatile sp_int* a)
5187
0
{
5188
0
    volatile sp_int_minimal* am = (volatile sp_int_minimal *)a;
5189
5190
0
    am->used = 0;
5191
0
    am->dp[0] = 0;
5192
#ifdef WOLFSSL_SP_INT_NEGATIVE
5193
    am->sign = MP_ZPOS;
5194
#endif
5195
0
}
5196
5197
5198
/* Initialize the multi-precision number to be zero with a given max size.
5199
 *
5200
 * @param [out] a     SP integer.
5201
 * @param [in]  size  Number of words to make available.
5202
 */
5203
static void _sp_init_size(sp_int* a, unsigned int size)
5204
0
{
5205
0
    volatile sp_int_minimal* am = (sp_int_minimal *)a;
5206
5207
#ifdef HAVE_WOLF_BIGINT
5208
    wc_bigint_init((struct WC_BIGINT*)&am->raw);
5209
#endif
5210
0
    _sp_zero((volatile sp_int*)am);
5211
5212
0
    am->size = (sp_size_t)size;
5213
0
}
5214
5215
/* Initialize the multi-precision number to be zero with a given max size.
5216
 *
5217
 * @param [out] a     SP integer.
5218
 * @param [in]  size  Number of words to make available.
5219
 *
5220
 * @return  MP_OKAY on success.
5221
 * @return  MP_VAL when a is NULL.
5222
 */
5223
int sp_init_size(sp_int* a, unsigned int size)
5224
0
{
5225
0
    int err = MP_OKAY;
5226
5227
    /* Validate parameters. Don't use size more than max compiled. */
5228
0
    if ((a == NULL) || ((size == 0) || (size > SP_INT_DIGITS))) {
5229
0
        err = MP_VAL;
5230
0
    }
5231
5232
0
    if (err == MP_OKAY) {
5233
0
        _sp_init_size(a, size);
5234
0
    }
5235
5236
0
    return err;
5237
0
}
5238
5239
/* Initialize the multi-precision number to be zero.
5240
 *
5241
 * @param [out] a  SP integer.
5242
 *
5243
 * @return  MP_OKAY on success.
5244
 * @return  MP_VAL when a is NULL.
5245
 */
5246
int sp_init(sp_int* a)
5247
0
{
5248
0
    int err = MP_OKAY;
5249
5250
    /* Validate parameter. */
5251
0
    if (a == NULL) {
5252
0
        err = MP_VAL;
5253
0
    }
5254
0
    else {
5255
        /* Assume complete sp_int with SP_INT_DIGITS digits. */
5256
0
        _sp_init_size(a, SP_INT_DIGITS);
5257
0
    }
5258
5259
0
    return err;
5260
0
}
5261
5262
#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
5263
/* Initialize up to six multi-precision numbers to be zero.
5264
 *
5265
 * @param [out] n1  SP integer.
5266
 * @param [out] n2  SP integer.
5267
 * @param [out] n3  SP integer.
5268
 * @param [out] n4  SP integer.
5269
 * @param [out] n5  SP integer.
5270
 * @param [out] n6  SP integer.
5271
 *
5272
 * @return  MP_OKAY on success.
5273
 */
5274
int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
5275
    sp_int* n6)
5276
0
{
5277
    /* Initialize only those pointers that are valid. */
5278
0
    if (n1 != NULL) {
5279
0
        _sp_init_size(n1, SP_INT_DIGITS);
5280
0
    }
5281
0
    if (n2 != NULL) {
5282
0
        _sp_init_size(n2, SP_INT_DIGITS);
5283
0
    }
5284
0
    if (n3 != NULL) {
5285
0
        _sp_init_size(n3, SP_INT_DIGITS);
5286
0
    }
5287
0
    if (n4 != NULL) {
5288
0
        _sp_init_size(n4, SP_INT_DIGITS);
5289
0
    }
5290
0
    if (n5 != NULL) {
5291
0
        _sp_init_size(n5, SP_INT_DIGITS);
5292
0
    }
5293
0
    if (n6 != NULL) {
5294
0
        _sp_init_size(n6, SP_INT_DIGITS);
5295
0
    }
5296
5297
0
    return MP_OKAY;
5298
0
}
5299
#endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
5300
5301
/* Free the memory allocated in the multi-precision number.
5302
 *
5303
 * @param [in] a  SP integer.
5304
 */
5305
void sp_free(sp_int* a)
5306
0
{
5307
0
    if (a != NULL) {
5308
    #ifdef HAVE_WOLF_BIGINT
5309
        wc_bigint_free(&a->raw);
5310
    #endif
5311
0
    }
5312
0
}
5313
5314
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5315
    !defined(NO_DH) || defined(HAVE_ECC)
5316
/* Grow multi-precision number to be able to hold l digits.
5317
 * This function does nothing as the number of digits is fixed.
5318
 *
5319
 * @param [in, out] a  SP integer.
5320
 * @param [in]      l  Number of digits to grow to.
5321
 *
5322
 * @return  MP_OKAY on success.
5323
 * @return  MP_MEM when the number of digits requested is more than available.
5324
 */
5325
int sp_grow(sp_int* a, int l)
5326
0
{
5327
0
    int err = MP_OKAY;
5328
5329
    /* Validate parameter. */
5330
0
    if ((a == NULL) || (l < 0)) {
5331
0
        err = MP_VAL;
5332
0
    }
5333
    /* Ensure enough words allocated for grow. */
5334
0
    if ((err == MP_OKAY) && ((unsigned int)l > a->size)) {
5335
0
        err = MP_MEM;
5336
0
    }
5337
0
    if (err == MP_OKAY) {
5338
0
        unsigned int i;
5339
5340
        /* Put in zeros up to the new length. */
5341
0
        for (i = a->used; i < (unsigned int)l; i++) {
5342
0
            a->dp[i] = 0;
5343
0
        }
5344
0
    }
5345
5346
0
    return err;
5347
0
}
5348
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || HAVE_ECC */
5349
5350
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5351
    defined(HAVE_ECC) || defined(WOLFSSL_PUBLIC_MP)
5352
/* Set the multi-precision number to zero.
5353
 *
5354
 * @param [out] a  SP integer to set to zero.
5355
 */
5356
void sp_zero(sp_int* a)
5357
0
{
5358
    /* Make an sp_int with valid pointer zero. */
5359
0
    if (a != NULL) {
5360
0
        _sp_zero(a);
5361
0
    }
5362
0
}
5363
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
5364
5365
/* Clear the data from the multi-precision number, set to zero and free.
5366
 *
5367
 * @param [out] a  SP integer.
5368
 */
5369
void sp_clear(sp_int* a)
5370
0
{
5371
#ifdef HAVE_FIPS
5372
    sp_forcezero(a);
5373
#else
5374
    /* Clear when valid pointer passed in. */
5375
0
    if (a != NULL) {
5376
0
        unsigned int i;
5377
5378
        /* Only clear the digits being used. */
5379
0
        for (i = 0; i < a->used; i++) {
5380
0
            a->dp[i] = 0;
5381
0
        }
5382
        /* Set back to zero and free. */
5383
0
        _sp_zero(a);
5384
0
        sp_free(a);
5385
0
    }
5386
0
#endif
5387
0
}
5388
5389
#if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || \
5390
    !defined(NO_DSA) || defined(WOLFSSL_SP_PRIME_GEN)
5391
/* Ensure the data in the multi-precision number is zeroed.
5392
 *
5393
 * Use when security sensitive data needs to be wiped.
5394
 *
5395
 * @param [in] a  SP integer.
5396
 */
5397
void sp_forcezero(sp_int* a)
5398
0
{
5399
    /* Zeroize when a valid pointer passed in. */
5400
0
    if (a != NULL) {
5401
        /* Ensure all data zeroized - data not zeroed when used decreases. */
5402
0
        ForceZero(a->dp, a->size * (word32)SP_WORD_SIZEOF);
5403
        /* Set back to zero. */
5404
    #ifdef HAVE_WOLF_BIGINT
5405
        /* Zeroize the raw data as well. */
5406
        wc_bigint_zero(&a->raw);
5407
    #endif
5408
        /* Make value zero and free. */
5409
0
        _sp_zero(a);
5410
0
        sp_free(a);
5411
0
    }
5412
0
}
5413
#endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
5414
5415
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5416
    !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
5417
/* Copy value of multi-precision number a into r.
5418
 *
5419
 * @param [in]  a  SP integer - source.
5420
 * @param [out] r  SP integer - destination.
5421
 */
5422
static void _sp_copy(const sp_int* a, sp_int* r)
5423
0
{
5424
    /* Copy words across. */
5425
0
    if (a->used == 0) {
5426
0
        r->dp[0] = 0;
5427
0
    }
5428
0
    else {
5429
0
        XMEMCPY(r->dp, a->dp, a->used * (word32)SP_WORD_SIZEOF);
5430
0
    }
5431
    /* Set number of used words in result. */
5432
0
    r->used = a->used;/* // NOLINT(clang-analyzer-core.uninitialized.Assign) */
5433
#ifdef WOLFSSL_SP_INT_NEGATIVE
5434
    /* Set sign of result. */
5435
    r->sign = a->sign;/* // NOLINT(clang-analyzer-core.uninitialized.Assign) */
5436
#endif
5437
0
}
5438
5439
/* Copy value of multi-precision number a into r.
5440
 *
5441
 * @param [in]  a  SP integer - source.
5442
 * @param [out] r  SP integer - destination.
5443
 *
5444
 * @return  MP_OKAY on success.
5445
 */
5446
int sp_copy(const sp_int* a, sp_int* r)
5447
0
{
5448
0
    int err = MP_OKAY;
5449
5450
    /* Validate parameters. */
5451
0
    if ((a == NULL) || (r == NULL)) {
5452
0
        err = MP_VAL;
5453
0
    }
5454
    /* Only copy if different pointers. */
5455
0
    if (a != r) {
5456
        /* Validated space in result. */
5457
0
        if ((err == MP_OKAY) && (a->used > r->size)) {
5458
0
            err = MP_VAL;
5459
0
        }
5460
0
        if (err == MP_OKAY) {
5461
0
            _sp_copy(a, r);
5462
0
        }
5463
0
    }
5464
5465
0
    return err;
5466
0
}
5467
#endif
5468
5469
#if ((defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5470
      !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
5471
     defined(OPENSSL_ALL)) && defined(WC_PROTECT_ENCRYPTED_MEM)
5472
5473
/* Copy 2 numbers into two results based on y. Copy a fixed number of digits.
5474
 *
5475
 * Constant time implementation.
5476
 * When y is 0, r1 = a2 and r2 = a1.
5477
 * When y is 1, r1 = a1 and r2 = a2.
5478
 *
5479
 * @param [in]  a1    First number to copy.
5480
 * @param [in]  a2    Second number to copy.
5481
 * @param [out] r1    First result number to copy into.
5482
 * @param [out] r2    Second result number to copy into.
5483
 * @param [in]  y     Indicates which number goes into which result number.
5484
 * @param [in]  used  Number of digits to copy.
5485
 */
5486
static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1,
5487
    sp_int* r2, int y, unsigned int used)
5488
{
5489
    unsigned int i;
5490
5491
    /* Copy data - constant time. */
5492
    for (i = 0; i < used; i++) {
5493
        r1->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y  ])) +
5494
                    (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1]));
5495
        r2->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1])) +
5496
                    (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y  ]));
5497
    }
5498
    /* Copy used. */
5499
    r1->used = (a1->used & ((int)wc_off_on_addr[y  ])) +
5500
               (a2->used & ((int)wc_off_on_addr[y^1]));
5501
    r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) +
5502
               (a2->used & ((int)wc_off_on_addr[y  ]));
5503
#ifdef WOLFSSL_SP_INT_NEGATIVE
5504
    /* Copy sign. */
5505
    r1->sign = (a1->sign & ((int)wc_off_on_addr[y  ])) +
5506
               (a2->sign & ((int)wc_off_on_addr[y^1]));
5507
    r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) +
5508
               (a2->sign & ((int)wc_off_on_addr[y  ]));
5509
#endif
5510
}
5511
5512
#endif
5513
5514
#if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
5515
/* Initializes r and copies in value from a.
5516
 *
5517
 * @param [out] r  SP integer - destination.
5518
 * @param [in]  a  SP integer - source.
5519
 *
5520
 * @return  MP_OKAY on success.
5521
 * @return  MP_VAL when a or r is NULL.
5522
 */
5523
int sp_init_copy(sp_int* r, const sp_int* a)
5524
0
{
5525
0
    int err;
5526
5527
    /* Initialize r and copy value in a into it. */
5528
0
    err = sp_init(r);
5529
0
    if (err == MP_OKAY) {
5530
0
        err = sp_copy(a, r);
5531
0
    }
5532
5533
0
    return err;
5534
0
}
5535
#endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
5536
5537
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5538
    !defined(NO_DH) || !defined(NO_DSA)
5539
/* Exchange the values in a and b.
5540
 *
5541
 * Avoid using this API as three copy operations are performed.
5542
 *
5543
 * @param [in, out] a  SP integer to swap.
5544
 * @param [in, out] b  SP integer to swap.
5545
 *
5546
 * @return  MP_OKAY on success.
5547
 * @return  MP_VAL when a or b is NULL.
5548
 * @return  MP_MEM when dynamic memory allocation fails.
5549
 */
5550
int sp_exch(sp_int* a, sp_int* b)
5551
0
{
5552
0
    int err = MP_OKAY;
5553
5554
    /* Validate parameters. */
5555
0
    if ((a == NULL) || (b == NULL)) {
5556
0
        err = MP_VAL;
5557
0
    }
5558
    /* Check space for a in b and b in a. */
5559
0
    if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
5560
0
        err = MP_VAL;
5561
0
    }
5562
5563
0
    if (err == MP_OKAY) {
5564
        /* Declare temporary for swapping. */
5565
0
        DECL_SP_INT(t, a->used);
5566
5567
        /* Create temporary for swapping. */
5568
0
        ALLOC_SP_INT(t, a->used, err, NULL);
5569
0
        if (err == MP_OKAY) {
5570
            /* Cache allocated size of a and b. */
5571
0
            sp_size_t asize = a->size;
5572
0
            sp_size_t bsize = b->size;
5573
            /* Copy all of SP int: t <- a, a <- b, b <- t. */
5574
0
            XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
5575
0
            XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
5576
0
            XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
5577
            /* Put back size of a and b. */
5578
0
            a->size = asize;
5579
0
            b->size = bsize;
5580
0
        }
5581
5582
0
        FREE_SP_INT(t, NULL);
5583
0
    }
5584
5585
0
    return err;
5586
0
}
5587
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
5588
        * !NO_DSA */
5589
5590
#if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
5591
    !defined(WC_NO_CACHE_RESISTANT)
5592
/* Conditional swap of SP int values in constant time.
5593
 *
5594
 * @param [in, out] a     First SP int to conditionally swap.
5595
 * @param [in, out] b     Second SP int to conditionally swap.
5596
 * @param [in]      cnt   Count of words to copy.
5597
 * @param [in]      swap  When value is 1 then swap.
5598
 * @param [in, out] t     Temporary SP int to use in swap.
5599
 * @return  MP_OKAY on success.
5600
 * @return  MP_MEM when dynamic memory allocation fails.
5601
 */
5602
int sp_cond_swap_ct_ex(sp_int* a, sp_int* b, int cnt, int swap, sp_int* t)
5603
0
{
5604
0
    unsigned int i;
5605
0
    volatile sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)swap;
5606
5607
    /* XOR other fields in sp_int into temp - mask set when swapping. */
5608
0
    t->used = (a->used ^ b->used) & (sp_size_t)mask;
5609
#ifdef WOLFSSL_SP_INT_NEGATIVE
5610
    t->sign = (a->sign ^ b->sign) & (sp_uint8)mask;
5611
#endif
5612
5613
    /* XOR requested words into temp - mask set when swapping. */
5614
0
    for (i = 0; i < (unsigned int)cnt; i++) {
5615
0
        t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
5616
0
    }
5617
5618
    /* XOR temporary - when mask set then result will be b. */
5619
0
    a->used ^= t->used;
5620
#ifdef WOLFSSL_SP_INT_NEGATIVE
5621
    a->sign ^= t->sign;
5622
#endif
5623
0
    for (i = 0; i < (unsigned int)cnt; i++) {
5624
0
        a->dp[i] ^= t->dp[i];
5625
0
    }
5626
5627
    /* XOR temporary - when mask set then result will be a. */
5628
0
    b->used ^= t->used;
5629
#ifdef WOLFSSL_SP_INT_NEGATIVE
5630
    b->sign ^= t->sign;
5631
#endif
5632
0
    for (i = 0; i < (unsigned int)cnt; i++) {
5633
0
        b->dp[i] ^= t->dp[i];
5634
0
    }
5635
5636
0
    return MP_OKAY;
5637
0
}
5638
5639
/* Conditional swap of SP int values in constant time.
5640
 *
5641
 * @param [in] a     First SP int to conditionally swap.
5642
 * @param [in] b     Second SP int to conditionally swap.
5643
 * @param [in] cnt   Count of words to copy.
5644
 * @param [in] swap  When value is 1 then swap.
5645
 * @return  MP_OKAY on success.
5646
 * @return  MP_MEM when dynamic memory allocation fails.
5647
 */
5648
int sp_cond_swap_ct(sp_int* a, sp_int* b, int cnt, int swap)
5649
0
{
5650
0
    int err = MP_OKAY;
5651
0
    DECL_SP_INT(t, (size_t)cnt);
5652
5653
    /* Allocate temporary to hold masked xor of a and b. */
5654
0
    ALLOC_SP_INT(t, cnt, err, NULL);
5655
5656
0
    if (err == MP_OKAY) {
5657
0
        err = sp_cond_swap_ct_ex(a, b, cnt, swap, t);
5658
0
        FREE_SP_INT(t, NULL);
5659
0
    }
5660
5661
0
    return err;
5662
0
}
5663
#endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
5664
5665
#ifdef WOLFSSL_SP_INT_NEGATIVE
5666
/* Calculate the absolute value of the multi-precision number.
5667
 *
5668
 * @param [in]  a  SP integer to calculate absolute value of.
5669
 * @param [out] r  SP integer to hold result.
5670
 *
5671
 * @return  MP_OKAY on success.
5672
 * @return  MP_VAL when a or r is NULL.
5673
 */
5674
int sp_abs(const sp_int* a, sp_int* r)
5675
{
5676
    int err;
5677
5678
    /* Copy a into r - copy fails when r is NULL. */
5679
    err = sp_copy(a, r);
5680
    if (err == MP_OKAY) {
5681
        r->sign = MP_ZPOS;
5682
    }
5683
5684
    return err;
5685
}
5686
#endif /* WOLFSSL_SP_INT_NEGATIVE */
5687
5688
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5689
    !defined(NO_RSA)
5690
/* Compare absolute value of two multi-precision numbers.
5691
 *
5692
 * @param [in] a  SP integer.
5693
 * @param [in] b  SP integer.
5694
 *
5695
 * @return  MP_GT when a is greater than b.
5696
 * @return  MP_LT when a is less than b.
5697
 * @return  MP_EQ when a is equal to b.
5698
 */
5699
static int _sp_cmp_abs(const sp_int* a, const sp_int* b)
5700
0
{
5701
0
    int ret = MP_EQ;
5702
5703
    /* Check number of words first. */
5704
0
    if (a->used > b->used) {
5705
0
        ret = MP_GT;
5706
0
    }
5707
0
    else if (a->used < b->used) {
5708
0
        ret = MP_LT;
5709
0
    }
5710
0
    else {
5711
0
        int i;
5712
5713
        /* Starting from most significant word, compare words.
5714
         * Stop when different and set comparison return.
5715
         */
5716
0
        for (i = (int)a->used - 1; i >= 0; i--) {
5717
0
            if (a->dp[i] > b->dp[i]) {
5718
0
                ret = MP_GT;
5719
0
                break;
5720
0
            }
5721
0
            else if (a->dp[i] < b->dp[i]) {
5722
0
                ret = MP_LT;
5723
0
                break;
5724
0
            }
5725
0
        }
5726
        /* If we made to the end then ret is MP_EQ from initialization. */
5727
0
    }
5728
5729
0
    return ret;
5730
0
}
5731
#endif
5732
5733
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
5734
/* Compare absolute value of two multi-precision numbers.
5735
 *
5736
 * Pointers are compared such that NULL is less than non-NULL.
5737
 *
5738
 * @param [in] a  SP integer.
5739
 * @param [in] b  SP integer.
5740
 *
5741
 * @return  MP_GT when a is greater than b.
5742
 * @return  MP_LT when a is less than b.
5743
 * @return  MP_EQ when a is equal to b.
5744
 */
5745
int sp_cmp_mag(const sp_int* a, const sp_int* b)
5746
0
{
5747
0
    int ret;
5748
5749
    /* Do pointer checks first. Both NULL returns equal. */
5750
0
    if (a == b) {
5751
0
        ret = MP_EQ;
5752
0
    }
5753
    /* Nothing is smaller than something. */
5754
0
    else if (a == NULL) {
5755
0
        ret = MP_LT;
5756
0
    }
5757
    /* Something is larger than nothing. */
5758
0
    else if (b == NULL) {
5759
0
        ret = MP_GT;
5760
0
    }
5761
0
    else
5762
0
    {
5763
        /* Compare values - a and b are not NULL. */
5764
0
        ret = _sp_cmp_abs(a, b);
5765
0
    }
5766
5767
0
    return ret;
5768
0
}
5769
#endif
5770
5771
#if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
5772
    defined(OPENSSL_EXTRA) || !defined(NO_DH) || !defined(NO_RSA)
5773
/* Compare two multi-precision numbers.
5774
 *
5775
 * Assumes a and b are not NULL.
5776
 *
5777
 * @param [in] a  SP integer.
5778
 * @param [in] b  SP integer.
5779
 *
5780
 * @return  MP_GT when a is greater than b.
5781
 * @return  MP_LT when a is less than b.
5782
 * @return  MP_EQ when a is equal to b.
5783
 */
5784
static int _sp_cmp(const sp_int* a, const sp_int* b)
5785
0
{
5786
0
    int ret;
5787
5788
#ifdef WOLFSSL_SP_INT_NEGATIVE
5789
    /* Check sign first. */
5790
    if (a->sign > b->sign) {
5791
        ret = MP_LT;
5792
    }
5793
    else if (a->sign < b->sign) {
5794
        ret = MP_GT;
5795
    }
5796
    else /* (a->sign == b->sign) */ {
5797
#endif
5798
        /* Compare values. */
5799
0
        ret = _sp_cmp_abs(a, b);
5800
#ifdef WOLFSSL_SP_INT_NEGATIVE
5801
        if (a->sign == MP_NEG) {
5802
            /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
5803
             * Swapping MP_GT and MP_LT results.
5804
             */
5805
            ret = -ret;
5806
        }
5807
    }
5808
#endif
5809
5810
0
    return ret;
5811
0
}
5812
#endif
5813
5814
#if !defined(NO_RSA) || !defined(NO_DSA) || defined(HAVE_ECC) || \
5815
    !defined(NO_DH) || defined(WOLFSSL_SP_MATH_ALL)
5816
/* Compare two multi-precision numbers.
5817
 *
5818
 * Pointers are compared such that NULL is less than non-NULL.
5819
 *
5820
 * @param [in] a  SP integer.
5821
 * @param [in] b  SP integer.
5822
 *
5823
 * @return  MP_GT when a is greater than b.
5824
 * @return  MP_LT when a is less than b.
5825
 * @return  MP_EQ when a is equal to b.
5826
 */
5827
int sp_cmp(const sp_int* a, const sp_int* b)
5828
0
{
5829
0
    int ret;
5830
5831
    /* Check pointers first. Both NULL returns equal. */
5832
0
    if (a == b) {
5833
0
        ret = MP_EQ;
5834
0
    }
5835
    /* Nothing is smaller than something. */
5836
0
    else if (a == NULL) {
5837
0
        ret = MP_LT;
5838
0
    }
5839
    /* Something is larger than nothing. */
5840
0
    else if (b == NULL) {
5841
0
        ret = MP_GT;
5842
0
    }
5843
0
    else
5844
0
    {
5845
        /* Compare values - a and b are not NULL. */
5846
0
        ret = _sp_cmp(a, b);
5847
0
    }
5848
5849
0
    return ret;
5850
0
}
5851
#endif
5852
5853
#if defined(HAVE_ECC) && !defined(WC_NO_RNG) && \
5854
    defined(WOLFSSL_ECC_GEN_REJECT_SAMPLING)
5855
/* Compare two multi-precision numbers in constant time.
5856
 *
5857
 * Assumes a and b are not NULL.
5858
 * Assumes a and b are positive.
5859
 *
5860
 * @param [in] a  SP integer.
5861
 * @param [in] b  SP integer.
5862
 * @param [in] n  Number of digits to compare.
5863
 *
5864
 * @return  MP_GT when a is greater than b.
5865
 * @return  MP_LT when a is less than b.
5866
 * @return  MP_EQ when a is equal to b.
5867
 */
5868
static int _sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
5869
{
5870
    int ret = MP_EQ;
5871
    int i;
5872
    volatile int mask = -1;
5873
5874
    for (i = n - 1; i >= 0; i--) {
5875
        sp_int_digit ad = a->dp[i] & ((sp_int_digit)0 - (i < (int)a->used));
5876
        sp_int_digit bd = b->dp[i] & ((sp_int_digit)0 - (i < (int)b->used));
5877
5878
        ret |= mask & ((0 - (ad < bd)) & MP_LT);
5879
        mask &= 0 - (ret == MP_EQ);
5880
        ret |= mask & ((0 - (ad > bd)) & MP_GT);
5881
        mask &= 0 - (ret == MP_EQ);
5882
    }
5883
5884
    return ret;
5885
}
5886
5887
/* Compare two multi-precision numbers in constant time.
5888
 *
5889
 * Pointers are compared such that NULL is less than non-NULL.
5890
 * Assumes a and b are positive.
5891
 * Assumes a and b have had n digits set at some point.
5892
 *
5893
 * @param [in] a  SP integer.
5894
 * @param [in] b  SP integer.
5895
 * @param [in] n  Number of digits to compare.
5896
 *
5897
 * @return  MP_GT when a is greater than b.
5898
 * @return  MP_LT when a is less than b.
5899
 * @return  MP_EQ when a is equal to b.
5900
 */
5901
int sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
5902
{
5903
    int ret;
5904
5905
    /* Check pointers first. Both NULL returns equal. */
5906
    if (a == b) {
5907
        ret = MP_EQ;
5908
    }
5909
    /* Nothing is smaller than something. */
5910
    else if (a == NULL) {
5911
        ret = MP_LT;
5912
    }
5913
    /* Something is larger than nothing. */
5914
    else if (b == NULL) {
5915
        ret = MP_GT;
5916
    }
5917
    else
5918
    {
5919
        /* Compare values - a and b are not NULL. */
5920
        ret = _sp_cmp_ct(a, b, n);
5921
    }
5922
5923
    return ret;
5924
}
5925
#endif /* HAVE_ECC && !WC_NO_RNG && WOLFSSL_ECC_GEN_REJECT_SAMPLING */
5926
5927
/*************************
5928
 * Bit check/set functions
5929
 *************************/
5930
5931
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5932
    ((defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_SM2)) && \
5933
     defined(HAVE_ECC)) || defined(OPENSSL_EXTRA) || defined(WOLFSSL_PUBLIC_MP)
5934
/* Check if a bit is set
5935
 *
5936
 * When a is NULL, result is 0.
5937
 *
5938
 * @param [in] a  SP integer.
5939
 * @param [in] b  Bit position to check.
5940
 *
5941
 * @return  0 when bit is not set.
5942
 * @return  1 when bit is set.
5943
 */
5944
int sp_is_bit_set(const sp_int* a, unsigned int b)
5945
0
{
5946
0
    int ret = 0;
5947
    /* Index of word. */
5948
0
    unsigned int i = b >> SP_WORD_SHIFT;
5949
5950
    /* Check parameters. */
5951
0
    if ((a != NULL) && (i < a->used)) {
5952
        /* Shift amount to get bit down to index 0. */
5953
0
        unsigned int s = b & SP_WORD_MASK;
5954
5955
        /* Get and mask bit. */
5956
0
        ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
5957
0
    }
5958
5959
0
    return ret;
5960
0
}
5961
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) ||
5962
        * (WOLFSSL_SP_MATH_ALL && HAVE_ECC) */
5963
5964
/* Count the number of bits in the multi-precision number.
5965
 *
5966
 * When a is NULL, result is 0.
5967
 *
5968
 * @param [in] a  SP integer.
5969
 *
5970
 * @return  Number of bits in the SP integer value.
5971
 */
5972
int sp_count_bits(const sp_int* a)
5973
0
{
5974
0
    int n = -1;
5975
5976
    /* Check parameter. */
5977
0
    if ((a != NULL) && (a->used > 0)) {
5978
        /* Get index of last word. */
5979
0
        n = (int)(a->used - 1);
5980
        /* Don't count leading zeros. */
5981
0
        while ((n >= 0) && (a->dp[n] == 0)) {
5982
0
            n--;
5983
0
        }
5984
0
    }
5985
5986
    /* -1 indicates SP integer value was zero. */
5987
0
    if (n < 0) {
5988
0
        n = 0;
5989
0
    }
5990
0
    else {
5991
        /* Get the most significant word. */
5992
0
        sp_int_digit d = a->dp[n];
5993
        /* Count of bits up to last word. */
5994
0
        n *= SP_WORD_SIZE;
5995
5996
0
    #ifdef SP_ASM_HI_BIT_SET_IDX
5997
0
        {
5998
0
            sp_int_digit hi;
5999
            /* Get index of highest set bit. */
6000
0
            SP_ASM_HI_BIT_SET_IDX(d, hi);
6001
            /* Add bits up to and including index. */
6002
0
            n += (int)hi + 1;
6003
0
        }
6004
    #elif defined(SP_ASM_LZCNT)
6005
        {
6006
            sp_int_digit lz;
6007
            /* Count number of leading zeros in highest non-zero digit. */
6008
            SP_ASM_LZCNT(d, lz);
6009
            /* Add non-leading zero bits count. */
6010
            n += SP_WORD_SIZE - (int)lz;
6011
        }
6012
    #else
6013
        /* Check if top word has more than half the bits set. */
6014
        if (d > SP_HALF_MAX) {
6015
            /* Set count to a full last word. */
6016
            n += SP_WORD_SIZE;
6017
            /* Don't count leading zero bits. */
6018
            while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
6019
                n--;
6020
                d <<= 1;
6021
            }
6022
        }
6023
        else {
6024
            /* Add to count until highest set bit is shifted out. */
6025
            while (d != 0) {
6026
                n++;
6027
                d >>= 1;
6028
            }
6029
        }
6030
    #endif
6031
0
    }
6032
6033
0
    return n;
6034
0
}
6035
6036
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6037
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
6038
    (defined(HAVE_ECC) && defined(FP_ECC)) || \
6039
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
6040
6041
/* Number of entries in array of number of least significant zero bits. */
6042
#define SP_LNZ_CNT      16
6043
/* Number of bits the array checks. */
6044
0
#define SP_LNZ_BITS     4
6045
/* Mask to apply to check with array. */
6046
0
#define SP_LNZ_MASK     0xf
6047
/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
6048
static const int sp_lnz[SP_LNZ_CNT] = {
6049
   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
6050
};
6051
6052
/* Count the number of least significant zero bits.
6053
 *
6054
 * When a is NULL, result is 0.
6055
 *
6056
 * @param [in] a  SP integer to use.
6057
 *
6058
 * @return  Number of least significant zero bits.
6059
 */
6060
#if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
6061
static
6062
#endif /* !HAVE_ECC || HAVE_COMP_KEY */
6063
int sp_cnt_lsb(const sp_int* a)
6064
0
{
6065
0
    unsigned int bc = 0;
6066
6067
    /* Check for number with a value. */
6068
0
    if ((a != NULL) && (!sp_iszero(a))) {
6069
0
        unsigned int i;
6070
0
        unsigned int j;
6071
6072
        /* Count least significant words that are zero. */
6073
0
        for (i = 0; (i < a->used) && (a->dp[i] == 0); i++, bc += SP_WORD_SIZE) {
6074
0
        }
6075
6076
        /* Use 4-bit table to get count. */
6077
0
        for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
6078
            /* Get number of least significant 0 bits in nibble. */
6079
0
            int cnt = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
6080
            /* Done if not all 4 bits are zero. */
6081
0
            if (cnt != 4) {
6082
                /* Add checked bits and count in last 4 bits checked. */
6083
0
                bc += j + (unsigned int)cnt;
6084
0
                break;
6085
0
            }
6086
0
        }
6087
0
    }
6088
6089
0
    return (int)bc;
6090
0
}
6091
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
6092
6093
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_ASN_TEMPLATE) || \
6094
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
6095
/* Determine if the most significant byte of the encoded multi-precision number
6096
 * has the top bit set.
6097
 *
6098
 * When a is NULL, result is 0.
6099
 *
6100
 * @param [in] a  SP integer.
6101
 *
6102
 * @return  1 when the top bit of top byte is set.
6103
 * @return  0 when the top bit of top byte is not set.
6104
 */
6105
int sp_leading_bit(const sp_int* a)
6106
0
{
6107
0
    int bit = 0;
6108
6109
    /* Check if we have a number and value to use. */
6110
0
    if ((a != NULL) && (a->used > 0)) {
6111
        /* Get top word. */
6112
0
        sp_int_digit d = a->dp[a->used - 1];
6113
6114
0
    #if SP_WORD_SIZE > 8
6115
        /* Remove bottom 8 bits until highest 8 bits left. */
6116
0
        while (d > (sp_int_digit)0xff) {
6117
0
            d >>= 8;
6118
0
        }
6119
0
    #endif
6120
        /* Get the highest bit of the 8-bit value. */
6121
0
        bit = (int)(d >> 7);
6122
0
    }
6123
6124
0
    return bit;
6125
0
}
6126
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
6127
6128
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
6129
    defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
6130
    !defined(NO_RSA)
6131
/* Set one bit of a: a |= 1 << i
6132
 * The field 'used' is updated in a.
6133
 *
6134
 * @param [in, out] a  SP integer to set bit into.
6135
 * @param [in]      i  Index of bit to set.
6136
 *
6137
 * @return  MP_OKAY on success.
6138
 * @return  MP_VAL when a is NULL, index is negative or index is too large.
6139
 */
6140
int sp_set_bit(sp_int* a, int i)
6141
0
{
6142
0
    int err = MP_OKAY;
6143
    /* Get index of word to set. */
6144
0
    sp_size_t w = (sp_size_t)(i >> SP_WORD_SHIFT);
6145
6146
    /* Check for valid number and space for bit. */
6147
0
    if ((a == NULL) || (i < 0) || (w >= a->size)) {
6148
0
        err = MP_VAL;
6149
0
    }
6150
0
    if (err == MP_OKAY) {
6151
        /* Amount to shift up to set bit in word. */
6152
0
        unsigned int s = (unsigned int)(i & (SP_WORD_SIZE - 1));
6153
0
        unsigned int j;
6154
6155
        /* Set to zero all unused words up to and including word to have bit
6156
         * set.
6157
         */
6158
0
        for (j = a->used; j <= w; j++) {
6159
0
            a->dp[j] = 0;
6160
0
        }
6161
        /* Set bit in word. */
6162
0
        a->dp[w] |= (sp_int_digit)1 << s;
6163
        /* Update used if necessary */
6164
0
        if (a->used <= w) {
6165
0
            a->used = (sp_size_t)(w + 1U);
6166
0
        }
6167
0
    }
6168
6169
0
    return err;
6170
0
}
6171
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
6172
        * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
6173
6174
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6175
    defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
6176
/* Exponentiate 2 to the power of e: a = 2^e
6177
 * This is done by setting the 'e'th bit.
6178
 *
6179
 * @param [out] a  SP integer to hold result.
6180
 * @param [in]  e  Exponent.
6181
 *
6182
 * @return  MP_OKAY on success.
6183
 * @return  MP_VAL when a is NULL, e is negative or 2^e is too large.
6184
 */
6185
int sp_2expt(sp_int* a, int e)
6186
0
{
6187
0
    int err = MP_OKAY;
6188
6189
    /* Validate parameters. */
6190
0
    if ((a == NULL) || (e < 0)) {
6191
0
        err = MP_VAL;
6192
0
    }
6193
0
    if (err == MP_OKAY) {
6194
        /* Set number to zero and then set bit. */
6195
0
        _sp_zero(a);
6196
0
        err = sp_set_bit(a, e);
6197
0
    }
6198
6199
0
    return err;
6200
0
}
6201
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
6202
        * WOLFSSL_KEY_GEN || !NO_DH */
6203
6204
/**********************
6205
 * Digit/Long functions
6206
 **********************/
6207
6208
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
6209
    defined(HAVE_ECC)
6210
/* Set the multi-precision number to be the value of the digit.
6211
 *
6212
 * @param [out] a  SP integer to become number.
6213
 * @param [in]  d  Digit to be set.
6214
 */
6215
static void _sp_set(sp_int* a, sp_int_digit d)
6216
0
{
6217
    /* Use sp_int_minimal to support allocated byte arrays as sp_ints. */
6218
0
    sp_int_minimal* am = (sp_int_minimal*)a;
6219
6220
0
    am->dp[0] = d;
6221
    /* d == 0 => used = 0, d > 0 => used = 1 */
6222
0
    am->used = (d > 0);
6223
#ifdef WOLFSSL_SP_INT_NEGATIVE
6224
    am->sign = MP_ZPOS;
6225
#endif
6226
0
}
6227
6228
/* Set the multi-precision number to be the value of the digit.
6229
 *
6230
 * @param [out] a  SP integer to become number.
6231
 * @param [in]  d  Digit to be set.
6232
 *
6233
 * @return  MP_OKAY on success.
6234
 * @return  MP_VAL when a is NULL.
6235
 */
6236
int sp_set(sp_int* a, sp_int_digit d)
6237
0
{
6238
0
    int err = MP_OKAY;
6239
6240
    /* Validate parameters. */
6241
0
    if (a == NULL) {
6242
0
        err = MP_VAL;
6243
0
    }
6244
0
    if (err == MP_OKAY) {
6245
0
        _sp_set(a, d);
6246
0
    }
6247
6248
0
    return err;
6249
0
}
6250
#endif
6251
6252
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
6253
/* Set a number into the multi-precision number.
6254
 *
6255
 * Number may be larger than the size of a digit.
6256
 *
6257
 * @param [out] a  SP integer to set.
6258
 * @param [in]  n  Long value to set.
6259
 *
6260
 * @return  MP_OKAY on success.
6261
 * @return  MP_VAL when a is NULL.
6262
 */
6263
int sp_set_int(sp_int* a, unsigned long n)
6264
0
{
6265
0
    int err = MP_OKAY;
6266
6267
0
    if (a == NULL) {
6268
0
        err = MP_VAL;
6269
0
    }
6270
6271
0
    if (err == MP_OKAY) {
6272
    #if SP_WORD_SIZE < SP_ULONG_BITS
6273
        /* Assign if value first in one word. */
6274
        if (n <= (sp_int_digit)SP_DIGIT_MAX) {
6275
    #endif
6276
0
            a->dp[0] = (sp_int_digit)n;
6277
0
            a->used = (n != 0);
6278
    #if SP_WORD_SIZE < SP_ULONG_BITS
6279
        }
6280
        else {
6281
            unsigned int i;
6282
6283
            /* Assign value word by word. */
6284
            for (i = 0; (i < a->size) && (n > 0); i++,n >>= SP_WORD_SIZE) {
6285
                a->dp[i] = (sp_int_digit)n;
6286
            }
6287
            /* Update number of words used. */
6288
            a->used = i;
6289
            /* Check for overflow. */
6290
            if ((i == a->size) && (n != 0)) {
6291
                err = MP_VAL;
6292
            }
6293
        }
6294
    #endif
6295
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6296
        a->sign = MP_ZPOS;
6297
    #endif
6298
0
    }
6299
6300
0
    return err;
6301
0
}
6302
#endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA  */
6303
6304
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
6305
    defined(HAVE_ECC)
6306
/* Compare a one digit number with a multi-precision number.
6307
 *
6308
 * When a is NULL, MP_LT is returned.
6309
 *
6310
 * @param [in] a  SP integer to compare.
6311
 * @param [in] d  Digit to compare with.
6312
 *
6313
 * @return  MP_GT when a is greater than d.
6314
 * @return  MP_LT when a is less than d.
6315
 * @return  MP_EQ when a is equal to d.
6316
 */
6317
int sp_cmp_d(const sp_int* a, sp_int_digit d)
6318
0
{
6319
0
    int ret = MP_EQ;
6320
6321
    /* No SP integer is always less - even when d is zero. */
6322
0
    if (a == NULL) {
6323
0
        ret = MP_LT;
6324
0
    }
6325
0
    else
6326
#ifdef WOLFSSL_SP_INT_NEGATIVE
6327
    /* Check sign first. */
6328
    if (a->sign == MP_NEG) {
6329
        ret = MP_LT;
6330
    }
6331
    else
6332
#endif
6333
0
    {
6334
        /* Check if SP integer as more than one word. */
6335
0
        if (a->used > 1) {
6336
0
            ret = MP_GT;
6337
0
        }
6338
        /* Special case for zero. */
6339
0
        else if (a->used == 0) {
6340
0
            if (d != 0) {
6341
0
                ret = MP_LT;
6342
0
            }
6343
            /* ret initialized to equal. */
6344
0
        }
6345
0
        else {
6346
            /* The single word in the SP integer can now be compared with d. */
6347
0
            if (a->dp[0] > d) {
6348
0
                ret = MP_GT;
6349
0
            }
6350
0
            else if (a->dp[0] < d) {
6351
0
                ret = MP_LT;
6352
0
            }
6353
            /* ret initialized to equal. */
6354
0
        }
6355
0
    }
6356
6357
0
    return ret;
6358
0
}
6359
#endif
6360
6361
#if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
6362
    defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
6363
/* Add a one digit number to the multi-precision number.
6364
 *
6365
 * @param [in]  a  SP integer to be added to.
6366
 * @param [in]  d  Digit to add.
6367
 * @param [out] r  SP integer to store result in.
6368
 *
6369
 * @return  MP_OKAY on success.
6370
 * @return  MP_VAL when result is too large for fixed size dp array.
6371
 */
6372
static int _sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
6373
0
{
6374
0
    int err = MP_OKAY;
6375
6376
    /* Special case of zero means we want result to have a digit when not adding
6377
     * zero. */
6378
0
    if (a->used == 0) {
6379
0
        r->dp[0] = d;
6380
0
        r->used = (d > 0);
6381
0
    }
6382
0
    else {
6383
0
        unsigned int i = 0;
6384
0
        sp_int_digit a0 = a->dp[0];
6385
6386
        /* Set used of result - updated if overflow seen. */
6387
0
        r->used = a->used;
6388
6389
0
        r->dp[0] = a0 + d;
6390
        /* Check for carry. */
6391
0
        if (r->dp[0] < a0) {
6392
            /* Do carry through all words. */
6393
0
            for (++i; i < a->used; i++) {
6394
0
                r->dp[i] = a->dp[i] + 1;
6395
0
                if (r->dp[i] != 0) {
6396
0
                   break;
6397
0
                }
6398
0
            }
6399
            /* Add another word if required. */
6400
0
            if (i == a->used) {
6401
                /* Check result has enough space for another word. */
6402
0
                if (i < r->size) {
6403
0
                    r->used++;
6404
0
                    r->dp[i] = 1;
6405
0
                }
6406
0
                else {
6407
0
                    err = MP_VAL;
6408
0
                }
6409
0
            }
6410
0
        }
6411
        /* When result is not the same as input, copy rest of digits. */
6412
0
        if ((err == MP_OKAY) && (r != a)) {
6413
            /* Copy any words that didn't update with carry. */
6414
0
            for (++i; i < a->used; i++) {
6415
0
                r->dp[i] = a->dp[i];
6416
0
            }
6417
0
        }
6418
0
    }
6419
6420
0
    return err;
6421
0
}
6422
#endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
6423
        * defined(WOLFSSL_SP_READ_RADIX_10) */
6424
6425
#if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
6426
    defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
6427
    defined(WOLFSSL_SP_INVMOD_MONT_CT) || (defined(WOLFSSL_SP_PRIME_GEN) && \
6428
    !defined(WC_NO_RNG))
6429
/* Sub a one digit number from the multi-precision number.
6430
 *
6431
 * @param [in]  a  SP integer to be subtracted from.
6432
 * @param [in]  d  Digit to subtract.
6433
 * @param [out] r  SP integer to store result in.
6434
 */
6435
static void _sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
6436
0
{
6437
    /* Set result used to be same as input. Updated with clamp. */
6438
0
    r->used = a->used;
6439
    /* Only possible when not handling negatives. */
6440
0
    if (a->used == 0) {
6441
        /* Set result to zero as no negative support. */
6442
0
        r->dp[0] = 0;
6443
0
    }
6444
0
    else {
6445
0
        unsigned int i = 0;
6446
0
        sp_int_digit a0 = a->dp[0];
6447
6448
0
        r->dp[0] = a0 - d;
6449
        /* Check for borrow. */
6450
0
        if (r->dp[0] > a0) {
6451
            /* Do borrow through all words. */
6452
0
            for (++i; i < a->used; i++) {
6453
0
                r->dp[i] = a->dp[i] - 1;
6454
0
                if (r->dp[i] != SP_DIGIT_MAX) {
6455
0
                   break;
6456
0
                }
6457
0
            }
6458
0
        }
6459
        /* When result is not the same as input, copy rest of digits. */
6460
0
        if (r != a) {
6461
            /* Copy any words that didn't update with borrow. */
6462
0
            for (++i; i < a->used; i++) {
6463
0
                r->dp[i] = a->dp[i];
6464
0
            }
6465
0
        }
6466
        /* Remove leading zero words. */
6467
0
        sp_clamp(r);
6468
0
    }
6469
0
}
6470
#endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
6471
        * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
6472
        * WOLFSSL_SP_PRIME_GEN */
6473
6474
#ifdef WOLFSSL_SP_ADD_D
6475
/* Add a one digit number to the multi-precision number.
6476
 *
6477
 * @param [in]  a  SP integer to be added to.
6478
 * @param [in]  d  Digit to add.
6479
 * @param [out] r  SP integer to store result in.
6480
 *
6481
 * @return  MP_OKAY on success.
6482
 * @return  MP_VAL when result is too large for fixed size dp array.
6483
 */
6484
int sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
6485
0
{
6486
0
    int err = MP_OKAY;
6487
6488
    /* Check validity of parameters. */
6489
0
    if ((a == NULL) || (r == NULL)) {
6490
0
        err = MP_VAL;
6491
0
    }
6492
6493
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
6494
    /* Check for space in result especially when carry adds a new word. */
6495
0
    if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
6496
0
         err = MP_VAL;
6497
0
    }
6498
0
    if (err == MP_OKAY) {
6499
        /* Positive only so just use internal function. */
6500
0
        err = _sp_add_d(a, d, r);
6501
0
    }
6502
#else
6503
    /* Check for space in result especially when carry adds a new word. */
6504
    if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used + 1 > r->size)) {
6505
         err = MP_VAL;
6506
    }
6507
    /* Check for space in result - no carry but borrow possible. */
6508
    if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used > r->size)) {
6509
         err = MP_VAL;
6510
    }
6511
    if (err == MP_OKAY) {
6512
        if (a->sign == MP_ZPOS) {
6513
            /* Positive, so use internal function. */
6514
            r->sign = MP_ZPOS;
6515
            err = _sp_add_d(a, d, r);
6516
        }
6517
        else if ((a->used > 1) || (a->dp[0] > d)) {
6518
            /* Negative value bigger than digit so subtract digit. */
6519
            r->sign = MP_NEG;
6520
            _sp_sub_d(a, d, r);
6521
        }
6522
        else {
6523
            /* Negative value smaller or equal to digit. */
6524
            r->sign = MP_ZPOS;
6525
            /* Subtract negative value from digit. */
6526
            r->dp[0] = d - a->dp[0];
6527
            /* Result is a digit equal to or greater than zero. */
6528
            r->used = (r->dp[0] > 0);
6529
        }
6530
    }
6531
#endif
6532
6533
0
    return err;
6534
0
}
6535
#endif /* WOLFSSL_SP_ADD_D */
6536
6537
#ifdef WOLFSSL_SP_SUB_D
6538
/* Sub a one digit number from the multi-precision number.
6539
 *
6540
 * @param [in]  a  SP integer to be subtracted from.
6541
 * @param [in]  d  Digit to subtract.
6542
 * @param [out] r  SP integer to store result in.
6543
 *
6544
 * @return  MP_OKAY on success.
6545
 * @return  MP_VAL when a or r is NULL.
6546
 */
6547
int sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
6548
0
{
6549
0
    int err = MP_OKAY;
6550
6551
    /* Check validity of parameters. */
6552
0
    if ((a == NULL) || (r == NULL)) {
6553
0
        err = MP_VAL;
6554
0
    }
6555
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
6556
    /* Check for space in result. */
6557
0
    if ((err == MP_OKAY) && (a->used > r->size)) {
6558
0
         err = MP_VAL;
6559
0
    }
6560
0
    if (err == MP_OKAY) {
6561
        /* Positive only so just use internal function. */
6562
0
        _sp_sub_d(a, d, r);
6563
0
    }
6564
#else
6565
    /* Check for space in result especially when borrow adds a new word. */
6566
    if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used + 1 > r->size)) {
6567
         err = MP_VAL;
6568
    }
6569
    /* Check for space in result - no carry but borrow possible. */
6570
    if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used > r->size)) {
6571
         err = MP_VAL;
6572
    }
6573
    if (err == MP_OKAY) {
6574
        if (a->sign == MP_NEG) {
6575
            /* Subtracting from negative use internal add. */
6576
            r->sign = MP_NEG;
6577
            err = _sp_add_d(a, d, r);
6578
        }
6579
        else if ((a->used > 1) || (a->dp[0] >= d)) {
6580
            /* Positive number greater than or equal to digit - subtract digit.
6581
             */
6582
            r->sign = MP_ZPOS;
6583
            _sp_sub_d(a, d, r);
6584
        }
6585
        else {
6586
            /* Positive value smaller than digit. */
6587
            r->sign = MP_NEG;
6588
            /* Subtract positive value from digit. */
6589
            r->dp[0] = d - a->dp[0];
6590
            /* Result is a digit equal to or greater than zero. */
6591
            r->used = 1;
6592
        }
6593
    }
6594
#endif
6595
6596
0
    return err;
6597
0
}
6598
#endif /* WOLFSSL_SP_SUB_D */
6599
6600
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6601
    defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
6602
    !defined(NO_DH) || defined(HAVE_ECC) || \
6603
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6604
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
6605
    (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)) || \
6606
    defined(WOLFSSL_SP_MUL_D)
6607
/* Multiply a by digit d and put result into r shifting up o digits.
6608
 *   r = (a * d) << (o * SP_WORD_SIZE)
6609
 *
6610
 * @param [in]  a  SP integer to be multiplied.
6611
 * @param [in]  d  SP digit to multiply by.
6612
 * @param [out] r  SP integer result.
6613
 * @param [in]  o  Number of digits to move result up by.
6614
 * @return  MP_OKAY on success.
6615
 * @return  MP_VAL when result is too large for sp_int.
6616
 */
6617
static int _sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r, unsigned int o)
6618
0
{
6619
0
    int err = MP_OKAY;
6620
0
    unsigned int i;
6621
#ifndef SQR_MUL_ASM
6622
    sp_int_word t = 0;
6623
#else
6624
0
    sp_int_digit l = 0;
6625
0
    sp_int_digit h = 0;
6626
0
#endif
6627
6628
#ifdef WOLFSSL_SP_SMALL
6629
    /* Zero out offset words. */
6630
    for (i = 0; i < o; i++) {
6631
        r->dp[i] = 0;
6632
    }
6633
#else
6634
    /* Don't use the offset. Only when doing small code size div. */
6635
0
    (void)o;
6636
0
#endif
6637
6638
    /* Multiply each word of a by n. */
6639
0
    for (i = 0; i < a->used; i++, o++) {
6640
    #ifndef SQR_MUL_ASM
6641
        /* Add product to top word of previous result. */
6642
        t += (sp_int_word)a->dp[i] * d;
6643
        /* Store low word. */
6644
        r->dp[o] = (sp_int_digit)t;
6645
        /* Move top word down. */
6646
        t >>= SP_WORD_SIZE;
6647
    #else
6648
        /* Multiply and add into low and high from previous result.
6649
         * No overflow of possible with add. */
6650
0
        SP_ASM_MUL_ADD_NO(l, h, a->dp[i], d);
6651
        /* Store low word. */
6652
0
        r->dp[o] = l;
6653
        /* Move high word into low word and set high word to 0. */
6654
0
        l = h;
6655
0
        h = 0;
6656
0
    #endif
6657
0
    }
6658
6659
    /* Check whether new word to be appended to result. */
6660
#ifndef SQR_MUL_ASM
6661
    if (t > 0)
6662
#else
6663
0
    if (l > 0)
6664
0
#endif
6665
0
    {
6666
        /* Validate space available in result. */
6667
0
        if (o == r->size) {
6668
0
            err = MP_VAL;
6669
0
        }
6670
0
        else {
6671
            /* Store new top word. */
6672
        #ifndef SQR_MUL_ASM
6673
            r->dp[o++] = (sp_int_digit)t;
6674
        #else
6675
0
            r->dp[o++] = l;
6676
0
        #endif
6677
0
        }
6678
0
    }
6679
    /* Update number of words in result. */
6680
0
    r->used = (sp_size_t)o;
6681
    /* In case n is zero. */
6682
0
    sp_clamp(r);
6683
6684
0
    return err;
6685
0
}
6686
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
6687
        *  WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
6688
6689
#ifdef WOLFSSL_SP_MUL_D
6690
/* Multiply a by digit d and put result into r. r = a * d
6691
 *
6692
 * @param [in]  a  SP integer to multiply.
6693
 * @param [in]  d  Digit to multiply by.
6694
 * @param [out] r  SP integer to hold result.
6695
 *
6696
 * @return  MP_OKAY on success.
6697
 * @return  MP_VAL when a or r is NULL, or a has the maximum number of digits
6698
 *          used.
6699
 */
6700
int sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r)
6701
0
{
6702
0
    int err = MP_OKAY;
6703
6704
    /* Validate parameters. */
6705
0
    if ((a == NULL) || (r == NULL)) {
6706
0
        err = MP_VAL;
6707
0
    }
6708
    /* Check space for product result - _sp_mul_d checks when new word added. */
6709
0
    if ((err == MP_OKAY) && (a->used > r->size)) {
6710
0
        err = MP_VAL;
6711
0
    }
6712
6713
0
    if (err == MP_OKAY) {
6714
0
        err = _sp_mul_d(a, d, r, 0);
6715
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6716
        /* Update sign. */
6717
        if (d == 0) {
6718
            r->sign = MP_ZPOS;
6719
        }
6720
        else {
6721
            r->sign = a->sign;
6722
        }
6723
    #endif
6724
0
    }
6725
6726
0
    return err;
6727
0
}
6728
#endif /* WOLFSSL_SP_MUL_D */
6729
6730
/* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
6731
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6732
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
6733
    defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
6734
#define WOLFSSL_SP_DIV_D
6735
#endif
6736
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6737
    !defined(NO_DH) || \
6738
    (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
6739
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
6740
#define WOLFSSL_SP_MOD_D
6741
#endif
6742
6743
#if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6744
     (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6745
      !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
6746
    defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
6747
#ifndef SP_ASM_DIV_WORD
6748
/* Divide a two digit number by a digit number and return. (hi | lo) / d
6749
 *
6750
 * @param [in] hi  SP integer digit. High digit of the dividend.
6751
 * @param [in] lo  SP integer digit. Low digit of the dividend.
6752
 * @param [in] d   SP integer digit. Number to divide by.
6753
 * @return  The division result.
6754
 */
6755
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
6756
    sp_int_digit d)
6757
{
6758
#ifdef WOLFSSL_SP_DIV_WORD_HALF
6759
    sp_int_digit r;
6760
6761
    /* Trial division using half of the bits in d. */
6762
6763
    /* Check for shortcut when no high word set. */
6764
    if (hi == 0) {
6765
        r = lo / d;
6766
    }
6767
    else {
6768
        /* Half the bits of d. */
6769
        sp_int_digit divh = d >> SP_HALF_SIZE;
6770
        /* Number to divide in one value. */
6771
        sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
6772
        sp_int_word trial;
6773
        sp_int_digit r2;
6774
6775
        /* Calculation for top SP_WORD_SIZE / 2 bits of dividend. */
6776
        /* Divide high word by top half of divisor. */
6777
        r = hi / divh;
6778
        /* When result too big then assume only max value. */
6779
        if (r > SP_HALF_MAX) {
6780
            r = SP_HALF_MAX;
6781
        }
6782
        /* Shift up result for trial division calculation. */
6783
        r <<= SP_HALF_SIZE;
6784
        /* Calculate trial value. */
6785
        trial = r * (sp_int_word)d;
6786
        /* Decrease r while trial is too big. */
6787
        while (trial > w) {
6788
            r -= (sp_int_digit)1 << SP_HALF_SIZE;
6789
            trial -= (sp_int_word)d << SP_HALF_SIZE;
6790
        }
6791
        /* Subtract trial. */
6792
        w -= trial;
6793
6794
        /* Calculation for remaining second SP_WORD_SIZE / 2 bits. */
6795
        /* Divide top SP_WORD_SIZE of remainder by top half of divisor. */
6796
        r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divh;
6797
        /* Calculate trial value. */
6798
        trial = r2 * (sp_int_word)d;
6799
        /* Decrease r while trial is too big. */
6800
        while (trial > w) {
6801
            r2--;
6802
            trial -= d;
6803
        }
6804
        /* Subtract trial. */
6805
        w -= trial;
6806
        /* Update result. */
6807
        r += r2;
6808
6809
        /* Calculation for remaining bottom SP_WORD_SIZE bits. */
6810
        r2 = ((sp_int_digit)w) / d;
6811
        /* Update result. */
6812
        r += r2;
6813
    }
6814
6815
    return r;
6816
#else
6817
    sp_int_word w;
6818
    sp_int_digit r;
6819
6820
    /* Use built-in divide. */
6821
    w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
6822
    w /= d;
6823
    r = (sp_int_digit)w;
6824
6825
    return r;
6826
#endif /* WOLFSSL_SP_DIV_WORD_HALF */
6827
}
6828
#endif /* !SP_ASM_DIV_WORD */
6829
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
6830
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6831
6832
#if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
6833
    !defined(WOLFSSL_SP_SMALL)
6834
6835
#if SP_WORD_SIZE == 64
6836
    /* 2^64 / 3 */
6837
    #define SP_DIV_3_CONST      0x5555555555555555L
6838
    /* 2^64 / 10 */
6839
0
    #define SP_DIV_10_CONST     0x1999999999999999L
6840
#elif SP_WORD_SIZE == 32
6841
    /* 2^32 / 3 */
6842
    #define SP_DIV_3_CONST      0x55555555
6843
    /* 2^32 / 10 */
6844
    #define SP_DIV_10_CONST     0x19999999
6845
#elif SP_WORD_SIZE == 16
6846
    /* 2^16 / 3 */
6847
    #define SP_DIV_3_CONST      0x5555
6848
    /* 2^16 / 10 */
6849
    #define SP_DIV_10_CONST     0x1999
6850
#elif SP_WORD_SIZE == 8
6851
    /* 2^8 / 3 */
6852
    #define SP_DIV_3_CONST      0x55
6853
    /* 2^8 / 10 */
6854
    #define SP_DIV_10_CONST     0x19
6855
#endif
6856
6857
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE < 64)
6858
/* Divide by 3: r = a / 3 and rem = a % 3
6859
 *
6860
 * Used in checking prime: (a % 3) == 0?.
6861
 *
6862
 * @param [in]  a    SP integer to be divided.
6863
 * @param [out] r    SP integer that is the quotient. May be NULL.
6864
 * @param [out] rem  SP integer that is the remainder. May be NULL.
6865
 */
6866
static void _sp_div_3(const sp_int* a, sp_int* r, sp_int_digit* rem)
6867
{
6868
#ifndef SQR_MUL_ASM
6869
    sp_int_word t;
6870
    sp_int_digit tt;
6871
#else
6872
    sp_int_digit l = 0;
6873
    sp_int_digit tt = 0;
6874
    sp_int_digit t = SP_DIV_3_CONST;
6875
    sp_int_digit lm = 0;
6876
    sp_int_digit hm = 0;
6877
#endif
6878
    sp_int_digit tr = 0;
6879
    /* Quotient fixup. */
6880
    static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
6881
    /* Remainder fixup. */
6882
    static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
6883
6884
    /* Check whether only mod value needed. */
6885
    if (r == NULL) {
6886
        unsigned int i;
6887
6888
        /*    2^2 mod 3 = 4 mod 3 = 1.
6889
         * => 2^(2*n) mod 3 = (2^2 mod 3)^n mod 3 = 1^n mod 3 = 1
6890
         * => (2^(2*n) * x) mod 3 = (2^(2*n) mod 3) * (x mod 3) = x mod 3
6891
         *
6892
         * Calculate mod 3 on sum of digits as SP_WORD_SIZE is a multiple of 2.
6893
         */
6894
    #ifndef SQR_MUL_ASM
6895
        t = 0;
6896
        /* Sum the digits. */
6897
        for (i = 0; i < a->used; i++) {
6898
            t += a->dp[i];
6899
        }
6900
        /* Sum digits of sum. */
6901
        t = (t >> SP_WORD_SIZE) + (t & SP_MASK);
6902
        /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
6903
        tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
6904
        /* Subtract trial division. */
6905
        tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
6906
    #else
6907
        /* Sum the digits. */
6908
        for (i = 0; i < a->used; i++) {
6909
            SP_ASM_ADDC_REG(l, tr, a->dp[i]);
6910
        }
6911
        /* Sum digits of sum - can get carry. */
6912
        SP_ASM_ADDC_REG(l, tt, tr);
6913
        /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
6914
        SP_ASM_MUL(lm, hm, l, t);
6915
        /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
6916
        hm += tt * SP_DIV_3_CONST;
6917
        /* Subtract trial division from digit. */
6918
        tr = l - (hm * 3);
6919
    #endif
6920
        /* tr is 0..5 but need 0..2 */
6921
        /* Fix up remainder. */
6922
        tr = sp_rem6[tr];
6923
        *rem = tr;
6924
    }
6925
    /* At least result needed - remainder is calculated anyway. */
6926
    else {
6927
        int i;
6928
6929
        /* Divide starting at most significant word down to least. */
6930
        for (i = (int)a->used - 1; i >= 0; i--) {
6931
    #ifndef SQR_MUL_ASM
6932
            /* Combine remainder from last operation with this word. */
6933
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6934
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
6935
            tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
6936
            /* Subtract trial division. */
6937
            tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
6938
    #else
6939
            /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
6940
            SP_ASM_MUL(l, tt, a->dp[i], t);
6941
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
6942
            tt += tr * SP_DIV_3_CONST;
6943
            /* Subtract trial division from digit. */
6944
            tr = a->dp[i] - (tt * 3);
6945
    #endif
6946
            /* tr is 0..5 but need 0..2 */
6947
            /* Fix up result. */
6948
            tt += sp_r6[tr];
6949
            /* Fix up remainder. */
6950
            tr = sp_rem6[tr];
6951
            /* Store result of digit divided by 3. */
6952
            r->dp[i] = tt;
6953
        }
6954
6955
        /* Set the used amount to maximal amount. */
6956
        r->used = a->used;
6957
        /* Remove leading zeros. */
6958
        sp_clamp(r);
6959
        /* Return remainder if required. */
6960
        if (rem != NULL) {
6961
            *rem = tr;
6962
        }
6963
    }
6964
}
6965
#endif /* !(WOLFSSL_SP_SMALL && (SP_WORD_SIZE < 64) */
6966
6967
/* Divide by 10: r = a / 10 and rem = a % 10
6968
 *
6969
 * Used when writing with a radix of 10 - decimal number.
6970
 *
6971
 * @param [in]  a    SP integer to be divided.
6972
 * @param [out] r    SP integer that is the quotient. May be NULL.
6973
 * @param [out] rem  SP integer that is the remainder. May be NULL.
6974
 */
6975
static void _sp_div_10(const sp_int* a, sp_int* r, sp_int_digit* rem)
6976
0
{
6977
0
    int i;
6978
#ifndef SQR_MUL_ASM
6979
    sp_int_word t;
6980
    sp_int_digit tt;
6981
#else
6982
0
    sp_int_digit l = 0;
6983
0
    sp_int_digit tt = 0;
6984
0
    sp_int_digit t = SP_DIV_10_CONST;
6985
0
#endif
6986
0
    sp_int_digit tr = 0;
6987
6988
    /* Check whether only mod value needed. */
6989
0
    if (r == NULL) {
6990
        /* Divide starting at most significant word down to least. */
6991
0
        for (i = (int)a->used - 1; i >= 0; i--) {
6992
    #ifndef SQR_MUL_ASM
6993
            /* Combine remainder from last operation with this word. */
6994
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6995
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
6996
            tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
6997
            /* Subtract trial division. */
6998
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
6999
    #else
7000
            /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
7001
0
            SP_ASM_MUL(l, tt, a->dp[i], t);
7002
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
7003
             */
7004
0
            tt += tr * SP_DIV_10_CONST;
7005
            /* Subtract trial division from digit. */
7006
0
            tr = a->dp[i] - (tt * 10);
7007
0
    #endif
7008
            /* tr is 0..99 but need 0..9 */
7009
            /* Fix up remainder. */
7010
0
            tr = tr % 10;
7011
0
        }
7012
0
        *rem = tr;
7013
0
    }
7014
    /* At least result needed - remainder is calculated anyway. */
7015
0
    else {
7016
        /* Divide starting at most significant word down to least. */
7017
0
        for (i = (int)a->used - 1; i >= 0; i--) {
7018
    #ifndef SQR_MUL_ASM
7019
            /* Combine remainder from last operation with this word. */
7020
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
7021
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
7022
            tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
7023
            /* Subtract trial division. */
7024
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
7025
    #else
7026
            /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
7027
0
            SP_ASM_MUL(l, tt, a->dp[i], t);
7028
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
7029
             */
7030
0
            tt += tr * SP_DIV_10_CONST;
7031
            /* Subtract trial division from digit. */
7032
0
            tr = a->dp[i] - (tt * 10);
7033
0
    #endif
7034
            /* tr is 0..99 but need 0..9 */
7035
            /* Fix up result. */
7036
0
            tt += tr / 10;
7037
            /* Fix up remainder. */
7038
0
            tr %= 10;
7039
            /* Store result of digit divided by 10. */
7040
0
            r->dp[i] = tt;
7041
0
        }
7042
7043
        /* Set the used amount to maximal amount. */
7044
0
        r->used = a->used;
7045
        /* Remove leading zeros. */
7046
0
        sp_clamp(r);
7047
        /* Return remainder if required. */
7048
0
        if (rem != NULL) {
7049
0
            *rem = tr;
7050
0
        }
7051
0
    }
7052
0
}
7053
#endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
7054
7055
#if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
7056
/* Divide by small number: r = a / d and rem = a % d
7057
 *
7058
 * @param [in]  a    SP integer to be divided.
7059
 * @param [in]  d    Digit to divide by.
7060
 * @param [out] r    SP integer that is the quotient. May be NULL.
7061
 * @param [out] rem  SP integer that is the remainder. May be NULL.
7062
 */
7063
static void _sp_div_small(const sp_int* a, sp_int_digit d, sp_int* r,
7064
    sp_int_digit* rem)
7065
0
{
7066
0
    int i;
7067
#ifndef SQR_MUL_ASM
7068
    sp_int_word t;
7069
    sp_int_digit tt;
7070
#else
7071
0
    sp_int_digit l = 0;
7072
0
    sp_int_digit tt = 0;
7073
0
#endif
7074
0
    sp_int_digit tr = 0;
7075
0
    sp_int_digit m = SP_DIGIT_MAX / d;
7076
7077
0
#ifndef WOLFSSL_SP_SMALL
7078
    /* Check whether only mod value needed. */
7079
0
    if (r == NULL) {
7080
        /* Divide starting at most significant word down to least. */
7081
0
        for (i = (int)a->used - 1; i >= 0; i--) {
7082
        #ifndef SQR_MUL_ASM
7083
            /* Combine remainder from last operation with this word. */
7084
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
7085
            /* Get top digit after multiplying. */
7086
            tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
7087
            /* Subtract trial division. */
7088
            tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
7089
        #else
7090
            /* Multiply digit. */
7091
0
            SP_ASM_MUL(l, tt, a->dp[i], m);
7092
            /* Add multiplied remainder to top digit. */
7093
0
            tt += tr * m;
7094
            /* Subtract trial division from digit. */
7095
0
            tr = a->dp[i] - (tt * d);
7096
0
        #endif
7097
            /* tr < d * d */
7098
            /* Fix up remainder. */
7099
0
            tr = tr % d;
7100
0
        }
7101
0
        *rem = tr;
7102
0
    }
7103
    /* At least result needed - remainder is calculated anyway. */
7104
0
    else
7105
0
#endif /* !WOLFSSL_SP_SMALL */
7106
0
    {
7107
        /* Divide starting at most significant word down to least. */
7108
0
        for (i = (int)a->used - 1; i >= 0; i--) {
7109
        #ifndef SQR_MUL_ASM
7110
            /* Combine remainder from last operation with this word. */
7111
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
7112
            /* Get top digit after multiplying. */
7113
            tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
7114
            /* Subtract trial division. */
7115
            tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
7116
        #else
7117
            /* Multiply digit. */
7118
0
            SP_ASM_MUL(l, tt, a->dp[i], m);
7119
            /* Add multiplied remainder to top digit. */
7120
0
            tt += tr * m;
7121
            /* Subtract trial division from digit. */
7122
0
            tr = a->dp[i] - (tt * d);
7123
0
        #endif
7124
            /* tr < d * d */
7125
            /* Fix up result. */
7126
0
            tt += tr / d;
7127
            /* Fix up remainder. */
7128
0
            tr %= d;
7129
            /* Store result of dividing the digit. */
7130
        #ifdef WOLFSSL_SP_SMALL
7131
            if (r != NULL)
7132
        #endif
7133
0
            {
7134
0
                r->dp[i] = tt;
7135
0
            }
7136
0
        }
7137
7138
    #ifdef WOLFSSL_SP_SMALL
7139
        if (r != NULL)
7140
    #endif
7141
0
        {
7142
            /* Set the used amount to maximal amount. */
7143
0
            r->used = a->used;
7144
            /* Remove leading zeros. */
7145
0
            sp_clamp(r);
7146
0
        }
7147
        /* Return remainder if required. */
7148
0
        if (rem != NULL) {
7149
0
            *rem = tr;
7150
0
        }
7151
0
    }
7152
0
}
7153
#endif
7154
7155
#ifdef WOLFSSL_SP_DIV_D
7156
/* Divide a multi-precision number by a digit size number and calculate
7157
 * remainder.
7158
 *   r = a / d; rem = a % d
7159
 *
7160
 * Use trial division algorithm.
7161
 *
7162
 * @param [in]  a    SP integer to be divided.
7163
 * @param [in]  d    Digit to divide by.
7164
 * @param [out] r    SP integer that is the quotient. May be NULL.
7165
 * @param [out] rem  Digit that is the remainder. May be NULL.
7166
 */
7167
static void _sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r,
7168
    sp_int_digit* rem)
7169
0
{
7170
0
    int i;
7171
#ifndef SQR_MUL_ASM
7172
    sp_int_word w = 0;
7173
#else
7174
0
    sp_int_digit l;
7175
0
    sp_int_digit h = 0;
7176
0
#endif
7177
0
    sp_int_digit t;
7178
7179
    /* Divide starting at most significant word down to least. */
7180
0
    for (i = (int)a->used - 1; i >= 0; i--) {
7181
    #ifndef SQR_MUL_ASM
7182
        /* Combine remainder from last operation with this word and divide. */
7183
        t = sp_div_word((sp_int_digit)w, a->dp[i], d);
7184
        /* Combine remainder from last operation with this word. */
7185
        w = (w << SP_WORD_SIZE) | a->dp[i];
7186
        /* Subtract to get modulo result. */
7187
        w -= (sp_int_word)t * d;
7188
    #else
7189
        /* Get current word. */
7190
0
        l = a->dp[i];
7191
        /* Combine remainder from last operation with this word and divide. */
7192
0
        t = sp_div_word(h, l, d);
7193
        /* Subtract to get modulo result. */
7194
0
        h = l - t * d;
7195
0
    #endif
7196
        /* Store result of dividing the digit. */
7197
0
        if (r != NULL) {
7198
0
            r->dp[i] = t;
7199
0
        }
7200
0
    }
7201
0
    if (r != NULL) {
7202
        /* Set the used amount to maximal amount. */
7203
0
        r->used = a->used;
7204
        /* Remove leading zeros. */
7205
0
        sp_clamp(r);
7206
0
    }
7207
7208
    /* Return remainder if required. */
7209
0
    if (rem != NULL) {
7210
    #ifndef SQR_MUL_ASM
7211
        *rem = (sp_int_digit)w;
7212
    #else
7213
0
        *rem = h;
7214
0
    #endif
7215
0
    }
7216
0
}
7217
7218
/* Divide a multi-precision number by a digit size number and calculate
7219
 * remainder.
7220
 *   r = a / d; rem = a % d
7221
 *
7222
 * @param [in]  a    SP integer to be divided.
7223
 * @param [in]  d    Digit to divide by.
7224
 * @param [out] r    SP integer that is the quotient. May be NULL.
7225
 * @param [out] rem  Digit that is the remainder. May be NULL.
7226
 *
7227
 * @return  MP_OKAY on success.
7228
 * @return  MP_VAL when a is NULL or d is 0.
7229
 */
7230
int sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
7231
0
{
7232
0
    int err = MP_OKAY;
7233
7234
    /* Validate parameters. */
7235
0
    if ((a == NULL) || (d == 0)) {
7236
0
        err = MP_VAL;
7237
0
    }
7238
    /* Check space for maximal sized result. */
7239
0
    if ((err == MP_OKAY) && (r != NULL) && (a->used > r->size)) {
7240
0
        err = MP_VAL;
7241
0
    }
7242
7243
0
    if (err == MP_OKAY) {
7244
0
#if !defined(WOLFSSL_SP_SMALL)
7245
    #if SP_WORD_SIZE < 64
7246
        if (d == 3) {
7247
            /* Fast implementation for divisor of 3. */
7248
            _sp_div_3(a, r, rem);
7249
        }
7250
        else
7251
    #endif
7252
0
        if (d == 10) {
7253
            /* Fast implementation for divisor of 10 - sp_todecimal(). */
7254
0
            _sp_div_10(a, r, rem);
7255
0
        }
7256
0
        else
7257
0
#endif
7258
0
        if (d <= SP_HALF_MAX) {
7259
            /* For small divisors. */
7260
0
            _sp_div_small(a, d, r, rem);
7261
0
        }
7262
0
        else
7263
0
        {
7264
0
            _sp_div_d(a, d, r, rem);
7265
0
        }
7266
7267
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7268
        if (r != NULL) {
7269
            r->sign = a->sign;
7270
        }
7271
    #endif
7272
0
    }
7273
7274
0
    return err;
7275
0
}
7276
#endif /* WOLFSSL_SP_DIV_D */
7277
7278
#ifdef WOLFSSL_SP_MOD_D
7279
/* Calculate a modulo the digit d into r: r = a mod d
7280
 *
7281
 * @param [in]  a  SP integer to reduce.
7282
 * @param [in]  d  Digit that is the modulus.
7283
 * @param [out] r  Digit that is the result.
7284
 */
7285
static void _sp_mod_d(const sp_int* a, const sp_int_digit d, sp_int_digit* r)
7286
0
{
7287
0
    int i;
7288
#ifndef SQR_MUL_ASM
7289
    sp_int_word w = 0;
7290
#else
7291
0
    sp_int_digit h = 0;
7292
0
#endif
7293
7294
    /* Divide starting at most significant word down to least. */
7295
0
    for (i = (int)a->used - 1; i >= 0; i--) {
7296
    #ifndef SQR_MUL_ASM
7297
        /* Combine remainder from last operation with this word and divide. */
7298
        sp_int_digit t = sp_div_word((sp_int_digit)w, a->dp[i], d);
7299
        /* Combine remainder from last operation with this word. */
7300
        w = (w << SP_WORD_SIZE) | a->dp[i];
7301
        /* Subtract to get modulo result. */
7302
        w -= (sp_int_word)t * d;
7303
    #else
7304
        /* Combine remainder from last operation with this word and divide. */
7305
0
        sp_int_digit t = sp_div_word(h, a->dp[i], d);
7306
        /* Subtract to get modulo result. */
7307
0
        h = a->dp[i] - t * d;
7308
0
    #endif
7309
0
    }
7310
7311
    /* Return remainder. */
7312
#ifndef SQR_MUL_ASM
7313
    *r = (sp_int_digit)w;
7314
#else
7315
0
    *r = h;
7316
0
#endif
7317
0
}
7318
7319
/* Calculate a modulo the digit d into r: r = a mod d
7320
 *
7321
 * @param [in]  a  SP integer to reduce.
7322
 * @param [in]  d  Digit that is the modulus.
7323
 * @param [out] r  Digit that is the result.
7324
 *
7325
 * @return  MP_OKAY on success.
7326
 * @return  MP_VAL when a is NULL or d is 0.
7327
 */
7328
#if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
7329
    !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
7330
static
7331
#endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
7332
int sp_mod_d(const sp_int* a, sp_int_digit d, sp_int_digit* r)
7333
0
{
7334
0
    int err = MP_OKAY;
7335
7336
    /* Validate parameters. */
7337
0
    if ((a == NULL) || (r == NULL) || (d == 0)) {
7338
0
        err = MP_VAL;
7339
0
    }
7340
7341
#if 0
7342
    sp_print(a, "a");
7343
    sp_print_digit(d, "m");
7344
#endif
7345
7346
0
    if (err == MP_OKAY) {
7347
        /* Check whether d is a power of 2. */
7348
0
        if ((d & (d - 1)) == 0) {
7349
0
            if (a->used == 0) {
7350
0
                *r = 0;
7351
0
            }
7352
0
            else {
7353
0
                *r = a->dp[0] & (d - 1);
7354
0
            }
7355
0
        }
7356
0
#if !defined(WOLFSSL_SP_SMALL)
7357
    #if SP_WORD_SIZE < 64
7358
        else if (d == 3) {
7359
            /* Fast implementation for divisor of 3. */
7360
            _sp_div_3(a, NULL, r);
7361
        }
7362
    #endif
7363
0
        else if (d == 10) {
7364
            /* Fast implementation for divisor of 10. */
7365
0
            _sp_div_10(a, NULL, r);
7366
0
        }
7367
0
#endif
7368
0
        else if (d <= SP_HALF_MAX) {
7369
            /* For small divisors. */
7370
0
            _sp_div_small(a, d, NULL, r);
7371
0
        }
7372
0
        else {
7373
0
            _sp_mod_d(a, d, r);
7374
0
        }
7375
7376
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7377
        if ((a->sign == MP_NEG) && (*r != 0)) {
7378
            *r = d - *r;
7379
        }
7380
    #endif
7381
0
    }
7382
7383
#if 0
7384
    sp_print_digit(*r, "rmod");
7385
#endif
7386
7387
0
    return err;
7388
0
}
7389
#endif /* WOLFSSL_SP_MOD_D */
7390
7391
#if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
7392
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
7393
     !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_SP_INVMOD)
7394
/* Divides a by 2 and stores in r: r = a >> 1
7395
 *
7396
 * @param [in]  a  SP integer to divide.
7397
 * @param [out] r  SP integer to hold result.
7398
 */
7399
static void _sp_div_2(const sp_int* a, sp_int* r)
7400
0
{
7401
0
    int i;
7402
7403
    /* Shift down each word by 1 and include bottom bit of next at top. */
7404
0
    for (i = 0; i < (int)a->used - 1; i++) {
7405
0
        r->dp[i]  = a->dp[i] >> 1;
7406
0
        r->dp[i] |= a->dp[i+1] << (SP_WORD_SIZE - 1);
7407
0
    }
7408
    /* Last word only needs to be shifted down. */
7409
0
    r->dp[i] = a->dp[i] >> 1;
7410
    /* Set used to be all words seen. */
7411
0
    r->used = (sp_size_t)(i + 1);
7412
    /* Remove leading zeros. */
7413
0
    sp_clamp(r);
7414
#ifdef WOLFSSL_SP_INT_NEGATIVE
7415
    /* Same sign in result. */
7416
    r->sign = a->sign;
7417
#endif
7418
0
}
7419
7420
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
7421
/* Divides a by 2 and stores in r: r = a >> 1
7422
 *
7423
 * @param [in]  a  SP integer to divide.
7424
 * @param [out] r  SP integer to hold result.
7425
 *
7426
 * @return  MP_OKAY on success.
7427
 * @return  MP_VAL when a or r is NULL.
7428
 */
7429
int sp_div_2(const sp_int* a, sp_int* r)
7430
0
{
7431
0
    int err = MP_OKAY;
7432
7433
    /* Only when a public API. */
7434
0
    if ((a == NULL) || (r == NULL)) {
7435
0
        err = MP_VAL;
7436
0
    }
7437
    /* Ensure maximal size is supported by result. */
7438
0
    if ((err == MP_OKAY) && (a->used > r->size)) {
7439
0
        err = MP_VAL;
7440
0
    }
7441
7442
0
    if (err == MP_OKAY) {
7443
0
        _sp_div_2(a, r);
7444
0
    }
7445
7446
0
    return err;
7447
0
}
7448
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
7449
#endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
7450
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7451
7452
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
7453
/* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
7454
 *
7455
 * r = a / 2 (mod m) - constant time (a < m and positive)
7456
 *
7457
 * @param [in]  a  SP integer to divide.
7458
 * @param [in]  m  SP integer that is the modulus.
7459
 * @param [out] r  SP integer to hold result.
7460
 *
7461
 * @return  MP_OKAY on success.
7462
 * @return  MP_VAL when a, m or r is NULL.
7463
 */
7464
int sp_div_2_mod_ct(const sp_int* a, const sp_int* m, sp_int* r)
7465
0
{
7466
0
    int err = MP_OKAY;
7467
7468
    /* Validate parameters. */
7469
0
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
7470
0
        err = MP_VAL;
7471
0
    }
7472
    /* Check result has enough space for a + m. */
7473
0
    if ((err == MP_OKAY) && (m->used + 1 > r->size)) {
7474
0
        err = MP_VAL;
7475
0
    }
7476
7477
0
    if (err == MP_OKAY) {
7478
    #ifndef SQR_MUL_ASM
7479
        sp_int_word  w = 0;
7480
    #else
7481
0
        sp_int_digit l = 0;
7482
0
        sp_int_digit h;
7483
0
        sp_int_digit t;
7484
0
    #endif
7485
        /* Mask to apply to modulus. */
7486
0
        volatile sp_int_digit mask = (sp_int_digit)0 - (a->dp[0] & 1);
7487
0
        sp_size_t i;
7488
7489
    #if 0
7490
        sp_print(a, "a");
7491
        sp_print(m, "m");
7492
    #endif
7493
7494
        /* Add a to m, if a is odd, into r in constant time. */
7495
0
        for (i = 0; i < m->used; i++) {
7496
            /* Mask to apply to a - set when used value at index. */
7497
0
            volatile sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
7498
7499
        #ifndef SQR_MUL_ASM
7500
            /* Conditionally add modulus. */
7501
            w         += m->dp[i] & mask;
7502
            /* Conditionally add a. */
7503
            w         += a->dp[i] & mask_a;
7504
            /* Store low digit in result. */
7505
            r->dp[i]   = (sp_int_digit)w;
7506
            /* Move high digit down. */
7507
            w        >>= DIGIT_BIT;
7508
        #else
7509
            /* No high digit. */
7510
0
            h        = 0;
7511
            /* Conditionally use modulus. */
7512
0
            t        = m->dp[i] & mask;
7513
            /* Add with carry modulus. */
7514
0
            SP_ASM_ADDC_REG(l, h, t);
7515
            /* Conditionally use a. */
7516
0
            t        = a->dp[i] & mask_a;
7517
            /* Add with carry a. */
7518
0
            SP_ASM_ADDC_REG(l, h, t);
7519
            /* Store low digit in result. */
7520
0
            r->dp[i] = l;
7521
            /* Move high digit down. */
7522
0
            l        = h;
7523
0
        #endif
7524
0
        }
7525
        /* Store carry. */
7526
    #ifndef SQR_MUL_ASM
7527
        r->dp[i] = (sp_int_digit)w;
7528
    #else
7529
0
        r->dp[i] = l;
7530
0
    #endif
7531
        /* Used includes carry - set or not. */
7532
0
        r->used = (sp_size_t)(i + 1);
7533
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7534
        r->sign = MP_ZPOS;
7535
    #endif
7536
        /* Divide conditional sum by 2. */
7537
0
        _sp_div_2(r, r);
7538
7539
    #if 0
7540
        sp_print(r, "rd2");
7541
    #endif
7542
0
    }
7543
7544
0
    return err;
7545
0
}
7546
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
7547
7548
/************************
7549
 * Add/Subtract Functions
7550
 ************************/
7551
7552
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
7553
/* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZE))
7554
 *
7555
 * @param [in]  a  SP integer to add to.
7556
 * @param [in]  b  SP integer to add.
7557
 * @param [out] r  SP integer to store result in.
7558
 * @param [in]  o  Number of digits to offset b.
7559
 */
7560
static void _sp_add_off(const sp_int* a, const sp_int* b, sp_int* r, int o)
7561
0
{
7562
0
    sp_size_t i = 0;
7563
#ifndef SQR_MUL_ASM
7564
    sp_int_word t = 0;
7565
#else
7566
0
    sp_int_digit l = 0;
7567
0
    sp_int_digit h = 0;
7568
0
    sp_int_digit t = 0;
7569
0
#endif
7570
7571
#ifdef SP_MATH_NEED_ADD_OFF
7572
    unsigned int j;
7573
7574
    /* Copy a into result up to offset. */
7575
    for (; (i < o) && (i < a->used); i++) {
7576
        r->dp[i] = a->dp[i];
7577
    }
7578
    /* Set result to 0 for digits beyond those in a. */
7579
    for (; i < o; i++) {
7580
        r->dp[i] = 0;
7581
    }
7582
7583
    /* Add each digit from a and b where both have values. */
7584
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
7585
    #ifndef SQR_MUL_ASM
7586
        t += a->dp[i];
7587
        t += b->dp[j];
7588
        r->dp[i] = (sp_int_digit)t;
7589
        t >>= SP_WORD_SIZE;
7590
    #else
7591
        t = a->dp[i];
7592
        SP_ASM_ADDC(l, h, t);
7593
        t = b->dp[j];
7594
        SP_ASM_ADDC(l, h, t);
7595
        r->dp[i] = l;
7596
        l = h;
7597
        h = 0;
7598
    #endif
7599
    }
7600
    /* Either a and/or b are out of digits. Add carry and remaining a digits. */
7601
    for (; i < a->used; i++) {
7602
    #ifndef SQR_MUL_ASM
7603
        t += a->dp[i];
7604
        r->dp[i] = (sp_int_digit)t;
7605
        t >>= SP_WORD_SIZE;
7606
    #else
7607
        t = a->dp[i];
7608
        SP_ASM_ADDC(l, h, t);
7609
        r->dp[i] = l;
7610
        l = h;
7611
        h = 0;
7612
    #endif
7613
    }
7614
    /* a is out of digits. Add carry and remaining b digits. */
7615
    for (; j < b->used; i++, j++) {
7616
    #ifndef SQR_MUL_ASM
7617
        t += b->dp[j];
7618
        r->dp[i] = (sp_int_digit)t;
7619
        t >>= SP_WORD_SIZE;
7620
    #else
7621
        t = b->dp[j];
7622
        SP_ASM_ADDC(l, h, t);
7623
        r->dp[i] = l;
7624
        l = h;
7625
        h = 0;
7626
    #endif
7627
    }
7628
#else
7629
0
    (void)o;
7630
7631
    /* Add each digit from a and b where both have values. */
7632
0
    for (; (i < a->used) && (i < b->used); i++) {
7633
    #ifndef SQR_MUL_ASM
7634
        t += a->dp[i];
7635
        t += b->dp[i];
7636
        r->dp[i] = (sp_int_digit)t;
7637
        t >>= SP_WORD_SIZE;
7638
    #else
7639
0
        t = a->dp[i];
7640
0
        SP_ASM_ADDC(l, h, t);
7641
0
        t = b->dp[i];
7642
0
        SP_ASM_ADDC(l, h, t);
7643
0
        r->dp[i] = l;
7644
0
        l = h;
7645
0
        h = 0;
7646
0
    #endif
7647
0
    }
7648
    /* Either a and/or b are out of digits. Add carry and remaining a digits. */
7649
0
    for (; i < a->used; i++) {
7650
    #ifndef SQR_MUL_ASM
7651
        t += a->dp[i];
7652
        r->dp[i] = (sp_int_digit)t;
7653
        t >>= SP_WORD_SIZE;
7654
    #else
7655
0
        t = a->dp[i];
7656
0
        SP_ASM_ADDC(l, h, t);
7657
0
        r->dp[i] = l;
7658
0
        l = h;
7659
0
        h = 0;
7660
0
    #endif
7661
0
    }
7662
    /* a is out of digits. Add carry and remaining b digits. */
7663
0
    for (; i < b->used; i++) {
7664
    #ifndef SQR_MUL_ASM
7665
        t += b->dp[i];
7666
        r->dp[i] = (sp_int_digit)t;
7667
        t >>= SP_WORD_SIZE;
7668
    #else
7669
0
        t = b->dp[i];
7670
0
        SP_ASM_ADDC(l, h, t);
7671
0
        r->dp[i] = l;
7672
0
        l = h;
7673
0
        h = 0;
7674
0
    #endif
7675
0
    }
7676
0
#endif
7677
7678
    /* Set used based on last digit put in. */
7679
0
    r->used = i;
7680
    /* Put in carry. */
7681
#ifndef SQR_MUL_ASM
7682
    r->dp[i] = (sp_int_digit)t;
7683
    r->used = (sp_size_t)(r->used + (sp_size_t)(t != 0));
7684
#else
7685
0
    r->dp[i] = l;
7686
0
    r->used = (sp_size_t)(r->used + (sp_size_t)(l != 0));
7687
0
#endif
7688
7689
    /* Remove leading zeros. */
7690
0
    sp_clamp(r);
7691
0
}
7692
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
7693
7694
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
7695
    !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
7696
    !defined(WOLFSSL_RSA_VERIFY_ONLY))
7697
/* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZE))
7698
 * a must be greater than b.
7699
 *
7700
 * When using offset, r == a is faster.
7701
 *
7702
 * @param [in]  a  SP integer to subtract from.
7703
 * @param [in]  b  SP integer to subtract.
7704
 * @param [out] r  SP integer to store result in.
7705
 * @param [in]  o  Number of digits to offset b.
7706
 */
7707
static void _sp_sub_off(const sp_int* a, const sp_int* b, sp_int* r,
7708
    sp_size_t o)
7709
0
{
7710
0
    sp_size_t i = 0;
7711
0
    sp_size_t j;
7712
#ifndef SQR_MUL_ASM
7713
    sp_int_sword t = 0;
7714
#else
7715
0
    sp_int_digit l = 0;
7716
0
    sp_int_digit h = 0;
7717
0
#endif
7718
7719
    /* Need to copy digits up to offset into result. */
7720
0
    if (r != a) {
7721
0
        for (; (i < o) && (i < a->used); i++) {
7722
0
            r->dp[i] = a->dp[i];
7723
0
        }
7724
0
    }
7725
0
    else {
7726
0
        i = o;
7727
0
    }
7728
    /* Index to sub at is the offset now. */
7729
7730
0
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
7731
    #ifndef SQR_MUL_ASM
7732
        /* Add a into and subtract b from current value. */
7733
        t += a->dp[i];
7734
        t -= b->dp[j];
7735
        /* Store low digit in result. */
7736
        r->dp[i] = (sp_int_digit)t;
7737
        /* Move high digit down. */
7738
        t >>= SP_WORD_SIZE;
7739
    #else
7740
        /* Add a into and subtract b from current value. */
7741
0
        SP_ASM_ADDC(l, h, a->dp[i]);
7742
0
        SP_ASM_SUBB(l, h, b->dp[j]);
7743
        /* Store low digit in result. */
7744
0
        r->dp[i] = l;
7745
        /* Move high digit down. */
7746
0
        l = h;
7747
        /* High digit is 0 when positive or -1 on negative. */
7748
0
        h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
7749
0
    #endif
7750
0
    }
7751
0
    for (; i < a->used; i++) {
7752
    #ifndef SQR_MUL_ASM
7753
        /* Add a into current value. */
7754
        t += a->dp[i];
7755
        /* Store low digit in result. */
7756
        r->dp[i] = (sp_int_digit)t;
7757
        /* Move high digit down. */
7758
        t >>= SP_WORD_SIZE;
7759
    #else
7760
        /* Add a into current value. */
7761
0
        SP_ASM_ADDC(l, h, a->dp[i]);
7762
        /* Store low digit in result. */
7763
0
        r->dp[i] = l;
7764
        /* Move high digit down. */
7765
0
        l = h;
7766
        /* High digit is 0 when positive or -1 on negative. */
7767
0
        h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
7768
0
    #endif
7769
0
    }
7770
7771
    /* Set used based on last digit put in. */
7772
0
    r->used = i;
7773
    /* Remove leading zeros. */
7774
0
    sp_clamp(r);
7775
0
}
7776
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
7777
        * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7778
7779
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
7780
/* Add b to a into r: r = a + b
7781
 *
7782
 * @param [in]  a  SP integer to add to.
7783
 * @param [in]  b  SP integer to add.
7784
 * @param [out] r  SP integer to store result in.
7785
 *
7786
 * @return  MP_OKAY on success.
7787
 * @return  MP_VAL when a, b, or r is NULL.
7788
 */
7789
int sp_add(const sp_int* a, const sp_int* b, sp_int* r)
7790
0
{
7791
0
    int err = MP_OKAY;
7792
7793
    /* Validate parameters. */
7794
0
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
7795
0
        err = MP_VAL;
7796
0
    }
7797
    /* Check that r is as big as a and b plus one word. */
7798
0
    if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
7799
0
        err = MP_VAL;
7800
0
    }
7801
7802
0
    if (err == MP_OKAY) {
7803
0
    #ifndef WOLFSSL_SP_INT_NEGATIVE
7804
        /* Add two positive numbers. */
7805
0
        _sp_add_off(a, b, r, 0);
7806
    #else
7807
        /* Same sign then add absolute values and use sign. */
7808
        if (a->sign == b->sign) {
7809
            _sp_add_off(a, b, r, 0);
7810
            r->sign = a->sign;
7811
        }
7812
        /* Different sign and abs(a) >= abs(b). */
7813
        else if (_sp_cmp_abs(a, b) != MP_LT) {
7814
            /* Subtract absolute values and use sign of a unless result 0. */
7815
            _sp_sub_off(a, b, r, 0);
7816
            if (sp_iszero(r)) {
7817
                r->sign = MP_ZPOS;
7818
            }
7819
            else {
7820
                r->sign = a->sign;
7821
            }
7822
        }
7823
        /* Different sign and abs(a) < abs(b). */
7824
        else {
7825
            /* Reverse subtract absolute values and use sign of b. */
7826
            _sp_sub_off(b, a, r, 0);
7827
            r->sign = b->sign;
7828
        }
7829
    #endif
7830
0
    }
7831
7832
0
    return err;
7833
0
}
7834
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
7835
7836
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
7837
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
7838
/* Subtract b from a into r: r = a - b
7839
 *
7840
 * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
7841
 *
7842
 * @param [in]  a  SP integer to subtract from.
7843
 * @param [in]  b  SP integer to subtract.
7844
 * @param [out] r  SP integer to store result in.
7845
 *
7846
 * @return  MP_OKAY on success.
7847
 * @return  MP_VAL when a, b, or r is NULL.
7848
 */
7849
int sp_sub(const sp_int* a, const sp_int* b, sp_int* r)
7850
0
{
7851
0
    int err = MP_OKAY;
7852
7853
    /* Validate parameters. */
7854
0
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
7855
0
        err = MP_VAL;
7856
0
    }
7857
#ifdef WOLFSSL_SP_INT_NEGATIVE
7858
    /* Check that r is as big as a and b plus one word. */
7859
    if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
7860
        err = MP_VAL;
7861
    }
7862
#else
7863
    /* Check that r is as big as a and b. */
7864
0
    if ((err == MP_OKAY) && ((a->used > r->size) || (b->used > r->size))) {
7865
0
        err = MP_VAL;
7866
0
    }
7867
0
#endif
7868
7869
0
    if (err == MP_OKAY) {
7870
0
    #ifndef WOLFSSL_SP_INT_NEGATIVE
7871
        /* Subtract positive numbers b from a. */
7872
0
        _sp_sub_off(a, b, r, 0);
7873
    #else
7874
        /* Different sign. */
7875
        if (a->sign != b->sign) {
7876
            /* Add absolute values and use sign of a. */
7877
            _sp_add_off(a, b, r, 0);
7878
            r->sign = a->sign;
7879
        }
7880
        /* Same sign and abs(a) >= abs(b). */
7881
        else if (_sp_cmp_abs(a, b) != MP_LT) {
7882
            /* Subtract absolute values and use sign of a unless result 0. */
7883
            _sp_sub_off(a, b, r, 0);
7884
            if (sp_iszero(r)) {
7885
                r->sign = MP_ZPOS;
7886
            }
7887
            else {
7888
                r->sign = a->sign;
7889
            }
7890
        }
7891
        /* Same sign and abs(a) < abs(b). */
7892
        else {
7893
            /* Reverse subtract absolute values and use opposite sign of a */
7894
            _sp_sub_off(b, a, r, 0);
7895
            r->sign = 1 - a->sign;
7896
        }
7897
    #endif
7898
0
    }
7899
7900
0
    return err;
7901
0
}
7902
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
7903
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
7904
7905
/****************************
7906
 * Add/Subtract mod functions
7907
 ****************************/
7908
7909
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
7910
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
7911
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
7912
/* Add two values and reduce: r = (a + b) % m
7913
 *
7914
 * @param [in]  a  SP integer to add.
7915
 * @param [in]  b  SP integer to add with.
7916
 * @param [in]  m  SP integer that is the modulus.
7917
 * @param [out] r  SP integer to hold result.
7918
 *
7919
 * @return  MP_OKAY on success.
7920
 * @return  MP_MEM when dynamic memory allocation fails.
7921
 */
7922
static int _sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m,
7923
    sp_int* r)
7924
0
{
7925
0
    int err = MP_OKAY;
7926
    /* Calculate used based on digits used in a and b. */
7927
0
    sp_size_t used = (sp_size_t)(((a->used >= b->used) ? a->used + 1U :
7928
0
                                                         b->used + 1U));
7929
0
    DECL_SP_INT(t, used);
7930
7931
    /* Allocate a temporary SP int to hold sum. */
7932
0
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
7933
7934
0
    if (err == MP_OKAY) {
7935
        /* Do sum. */
7936
0
        err = sp_add(a, b, t);
7937
0
    }
7938
0
    if (err == MP_OKAY) {
7939
        /* Mod result. */
7940
0
        err = sp_mod(t, m, r);
7941
0
    }
7942
7943
0
    FREE_SP_INT(t, NULL);
7944
0
    return err;
7945
0
}
7946
7947
/* Add two values and reduce: r = (a + b) % m
7948
 *
7949
 * @param [in]  a  SP integer to add.
7950
 * @param [in]  b  SP integer to add with.
7951
 * @param [in]  m  SP integer that is the modulus.
7952
 * @param [out] r  SP integer to hold result.
7953
 *
7954
 * @return  MP_OKAY on success.
7955
 * @return  MP_VAL when a, b, m or r is NULL.
7956
 * @return  MP_MEM when dynamic memory allocation fails.
7957
 */
7958
int sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
7959
0
{
7960
0
    int err = MP_OKAY;
7961
7962
    /* Validate parameters. */
7963
0
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
7964
0
        err = MP_VAL;
7965
0
    }
7966
    /* Ensure a and b aren't too big a number to operate on. */
7967
0
    else if (a->used >= SP_INT_DIGITS) {
7968
0
        err = MP_VAL;
7969
0
    }
7970
0
    else if (b->used >= SP_INT_DIGITS) {
7971
0
        err = MP_VAL;
7972
0
    }
7973
7974
7975
#if 0
7976
    if (err == MP_OKAY) {
7977
        sp_print(a, "a");
7978
        sp_print(b, "b");
7979
        sp_print(m, "m");
7980
    }
7981
#endif
7982
0
    if (err == MP_OKAY) {
7983
        /* Do add and modular reduction. */
7984
0
        err = _sp_addmod(a, b, m, r);
7985
0
    }
7986
#if 0
7987
    if (err == MP_OKAY) {
7988
        sp_print(r, "rma");
7989
    }
7990
#endif
7991
7992
0
    return err;
7993
0
}
7994
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
7995
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
7996
7997
#if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
7998
    defined(HAVE_ECC))
7999
/* Sub b from a and reduce: r = (a - b) % m
8000
 * Result is always positive.
8001
 *
8002
 * @param [in]  a  SP integer to subtract from.
8003
 * @param [in]  b  SP integer to subtract.
8004
 * @param [in]  m  SP integer that is the modulus.
8005
 * @param [out] r  SP integer to hold result.
8006
 *
8007
 * @return  MP_OKAY on success.
8008
 * @return  MP_MEM when dynamic memory allocation fails.
8009
 */
8010
static int _sp_submod(const sp_int* a, const sp_int* b, const sp_int* m,
8011
    sp_int* r)
8012
0
{
8013
0
    int err = MP_OKAY;
8014
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
8015
0
    unsigned int used = ((a->used >= m->used) ?
8016
0
        ((a->used >= b->used) ? (a->used + 1U) : (b->used + 1U)) :
8017
0
        ((b->used >= m->used)) ? (b->used + 1U) : (m->used + 1U));
8018
0
    DECL_SP_INT(t0, used);
8019
0
    DECL_SP_INT(t1, used);
8020
8021
0
    ALLOC_SP_INT_SIZE(t0, used, err, NULL);
8022
0
    ALLOC_SP_INT_SIZE(t1, used, err, NULL);
8023
0
    if (err == MP_OKAY) {
8024
        /* Reduce a to less than m. */
8025
0
        if (_sp_cmp(a, m) != MP_LT) {
8026
0
            err = sp_mod(a, m, t0);
8027
0
            a = t0;
8028
0
        }
8029
0
    }
8030
0
    if (err == MP_OKAY) {
8031
        /* Reduce b to less than m. */
8032
0
        if (_sp_cmp(b, m) != MP_LT) {
8033
0
            err = sp_mod(b, m, t1);
8034
0
            b = t1;
8035
0
        }
8036
0
    }
8037
0
    if (err == MP_OKAY) {
8038
        /* Add m to a if a smaller than b. */
8039
0
        if (_sp_cmp(a, b) == MP_LT) {
8040
0
            err = sp_add(a, m, t0);
8041
0
            a = t0;
8042
0
        }
8043
0
    }
8044
0
    if (err == MP_OKAY) {
8045
        /* Subtract b from a. */
8046
0
        err = sp_sub(a, b, r);
8047
0
    }
8048
8049
0
    FREE_SP_INT(t0, NULL);
8050
0
    FREE_SP_INT(t1, NULL);
8051
#else /* WOLFSSL_SP_INT_NEGATIVE */
8052
    sp_size_t used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
8053
    DECL_SP_INT(t, used);
8054
8055
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
8056
    /* Subtract b from a into temporary. */
8057
    if (err == MP_OKAY) {
8058
        err = sp_sub(a, b, t);
8059
    }
8060
    if (err == MP_OKAY) {
8061
        /* Reduce result mod m into r. */
8062
        err = sp_mod(t, m, r);
8063
    }
8064
    FREE_SP_INT(t, NULL);
8065
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8066
8067
0
    return err;
8068
0
}
8069
8070
/* Sub b from a and reduce: r = (a - b) % m
8071
 * Result is always positive.
8072
 *
8073
 * @param [in]  a  SP integer to subtract from.
8074
 * @param [in]  b  SP integer to subtract.
8075
 * @param [in]  m  SP integer that is the modulus.
8076
 * @param [out] r  SP integer to hold result.
8077
 *
8078
 * @return  MP_OKAY on success.
8079
 * @return  MP_VAL when a, b, m or r is NULL.
8080
 * @return  MP_MEM when dynamic memory allocation fails.
8081
 */
8082
int sp_submod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8083
0
{
8084
0
    int err = MP_OKAY;
8085
    /* Validate parameters. */
8086
0
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
8087
0
        err = MP_VAL;
8088
0
    }
8089
    /* Ensure a, b and m aren't too big a number to operate on. */
8090
0
    else if (a->used >= SP_INT_DIGITS) {
8091
0
        err = MP_VAL;
8092
0
    }
8093
0
    else if (b->used >= SP_INT_DIGITS) {
8094
0
        err = MP_VAL;
8095
0
    }
8096
0
    else if (m->used >= SP_INT_DIGITS) {
8097
0
        err = MP_VAL;
8098
0
    }
8099
8100
#if 0
8101
    if (err == MP_OKAY) {
8102
        sp_print(a, "a");
8103
        sp_print(b, "b");
8104
        sp_print(m, "m");
8105
    }
8106
#endif
8107
0
    if (err == MP_OKAY) {
8108
        /* Do submod. */
8109
0
        err = _sp_submod(a, b, m, r);
8110
0
    }
8111
#if 0
8112
    if (err == MP_OKAY) {
8113
        sp_print(r, "rms");
8114
    }
8115
#endif
8116
8117
0
    return err;
8118
0
}
8119
#endif /* WOLFSSL_SP_MATH_ALL */
8120
8121
/* Constant time clamping.
8122
 *
8123
 * @param [in, out] a  SP integer to clamp.
8124
 */
8125
static void sp_clamp_ct(sp_int* a)
8126
0
{
8127
0
    int i;
8128
0
    sp_size_t used = a->used;
8129
0
    volatile sp_size_t mask = (sp_size_t)-1;
8130
8131
0
    for (i = (int)a->used - 1; i >= 0; i--) {
8132
#if ((SP_WORD_SIZE == 64) && \
8133
     (defined(_WIN64) || !defined(WOLFSSL_UINT128_T_DEFINED))) || \
8134
    ((SP_WORD_SIZE == 32) && defined(NO_64BIT))
8135
        sp_int_digit negVal = ~a->dp[i];
8136
        sp_int_digit minusOne = a->dp[i] - 1;
8137
        sp_int_digit zeroMask =
8138
            (sp_int_digit)((sp_int_sdigit)(negVal & minusOne) >>
8139
                           (SP_WORD_SIZE - 1));
8140
#else
8141
0
        sp_size_t zeroMask =
8142
0
            (sp_size_t)((((sp_int_sword)a->dp[i]) - 1) >> SP_WORD_SIZE);
8143
0
#endif
8144
0
        mask &= (sp_size_t)zeroMask;
8145
0
        used = (sp_size_t)(used + mask);
8146
0
    }
8147
0
    a->used = used;
8148
0
}
8149
8150
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
8151
/* Add two values and reduce: r = (a + b) % m
8152
 *
8153
 * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
8154
 *
8155
 * Assumes a, b, m and r are not NULL.
8156
 * m and r must not be the same pointer.
8157
 *
8158
 * @param [in]  a  SP integer to add.
8159
 * @param [in]  b  SP integer to add with.
8160
 * @param [in]  m  SP integer that is the modulus.
8161
 * @param [out] r  SP integer to hold result.
8162
 *
8163
 * @return  MP_OKAY on success.
8164
 */
8165
int sp_addmod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8166
0
{
8167
0
    int err = MP_OKAY;
8168
#ifndef SQR_MUL_ASM
8169
    sp_int_sword w;
8170
    sp_int_sword s;
8171
#else
8172
0
    sp_int_digit wl;
8173
0
    sp_int_digit wh;
8174
0
    sp_int_digit sl;
8175
0
    sp_int_digit sh;
8176
0
    sp_int_digit t;
8177
0
#endif
8178
0
    volatile sp_int_digit mask;
8179
0
    volatile sp_int_digit mask_a = (sp_int_digit)-1;
8180
0
    volatile sp_int_digit mask_b = (sp_int_digit)-1;
8181
0
    sp_size_t i;
8182
8183
    /* Check result is as big as modulus. */
8184
0
    if (m->used > r->size) {
8185
0
        err = MP_VAL;
8186
0
    }
8187
    /* Validate parameters. */
8188
0
    if ((err == MP_OKAY) && (r == m)) {
8189
0
        err = MP_VAL;
8190
0
    }
8191
8192
0
    if (err == MP_OKAY) {
8193
#if 0
8194
        sp_print(a, "a");
8195
        sp_print(b, "b");
8196
        sp_print(m, "m");
8197
#endif
8198
8199
        /* Add a to b into r. Do the subtract of modulus but don't store result.
8200
         * When subtract result is negative, the overflow will be negative.
8201
         * Only need to subtract mod when result is positive - overflow is
8202
         * positive.
8203
         */
8204
    #ifndef SQR_MUL_ASM
8205
        w = 0;
8206
        s = 0;
8207
    #else
8208
0
        wl = 0;
8209
0
        sl = 0;
8210
0
        sh = 0;
8211
0
    #endif
8212
        /* Constant time - add modulus digits worth from a and b. */
8213
0
        for (i = 0; i < m->used; i++) {
8214
            /* Values past 'used' are not initialized. */
8215
0
            mask_a += (i == a->used);
8216
0
            mask_b += (i == b->used);
8217
8218
        #ifndef SQR_MUL_ASM
8219
            /* Add next digits from a and b to current value. */
8220
            w         += a->dp[i] & mask_a;
8221
            w         += b->dp[i] & mask_b;
8222
            /* Store low digit in result. */
8223
            r->dp[i]   = (sp_int_digit)w;
8224
            /* Add result to reducing value. */
8225
            s         += (sp_int_digit)w;
8226
            /* Subtract next digit of modulus. */
8227
            s         -= m->dp[i];
8228
            /* Move high digit of reduced result down. */
8229
            s        >>= DIGIT_BIT;
8230
            /* Move high digit of sum result down. */
8231
            w        >>= DIGIT_BIT;
8232
        #else
8233
0
            wh = 0;
8234
            /* Add next digits from a and b to current value. */
8235
0
            t = a->dp[i] & mask_a;
8236
0
            SP_ASM_ADDC_REG(wl, wh, t);
8237
0
            t = b->dp[i] & mask_b;
8238
0
            SP_ASM_ADDC_REG(wl, wh, t);
8239
            /* Store low digit in result. */
8240
0
            r->dp[i] = wl;
8241
            /* Add result to reducing value. */
8242
0
            SP_ASM_ADDC_REG(sl, sh, wl);
8243
            /* Subtract next digit of modulus. */
8244
0
            SP_ASM_SUBB(sl, sh, m->dp[i]);
8245
            /* Move high digit of reduced result down. */
8246
0
            sl = sh;
8247
            /* High digit is 0 when positive or -1 on negative. */
8248
0
            sh = (sp_int_digit)0 - (sh >> (SP_WORD_SIZE-1));
8249
            /* Move high digit of sum result down. */
8250
0
            wl = wh;
8251
0
        #endif
8252
0
        }
8253
    #ifndef SQR_MUL_ASM
8254
        /* Add carry into reduced result. */
8255
        s += (sp_int_digit)w;
8256
        /* s will be positive when subtracting modulus is needed. */
8257
        mask = (sp_int_digit)0 - (s >= 0);
8258
    #else
8259
        /* Add carry into reduced result. */
8260
0
        SP_ASM_ADDC_REG(sl, sh, wl);
8261
        /* s will be positive when subtracting modulus is needed. */
8262
0
        mask = (sh >> (SP_WORD_SIZE-1)) - 1;
8263
0
    #endif
8264
8265
        /* Constant time, conditionally, subtract modulus from sum. */
8266
    #ifndef SQR_MUL_ASM
8267
        w = 0;
8268
    #else
8269
0
        wl = 0;
8270
0
        wh = 0;
8271
0
    #endif
8272
0
        for (i = 0; i < m->used; i++) {
8273
        #ifndef SQR_MUL_ASM
8274
            /* Add result to current value and conditionally subtract modulus.
8275
             */
8276
            w         += r->dp[i];
8277
            w         -= m->dp[i] & mask;
8278
            /* Store low digit in result. */
8279
            r->dp[i]   = (sp_int_digit)w;
8280
            /* Move high digit of sum result down. */
8281
            w        >>= DIGIT_BIT;
8282
        #else
8283
            /* Add result to current value and conditionally subtract modulus.
8284
             */
8285
0
            SP_ASM_ADDC(wl, wh, r->dp[i]);
8286
0
            t = m->dp[i] & mask;
8287
0
            SP_ASM_SUBB_REG(wl, wh, t);
8288
            /* Store low digit in result. */
8289
0
            r->dp[i] = wl;
8290
            /* Move high digit of sum result down. */
8291
0
            wl = wh;
8292
            /* High digit is 0 when positive or -1 on negative. */
8293
0
            wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
8294
0
        #endif
8295
0
        }
8296
        /* Result will always have digits equal to or less than those in
8297
         * modulus. */
8298
0
        r->used = i;
8299
    #ifdef WOLFSSL_SP_INT_NEGATIVE
8300
        r->sign = MP_ZPOS;
8301
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
8302
        /* Remove leading zeros. */
8303
0
        sp_clamp_ct(r);
8304
8305
#if 0
8306
        sp_print(r, "rma");
8307
#endif
8308
0
    }
8309
8310
0
    return err;
8311
0
}
8312
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
8313
8314
#if (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)) || \
8315
    (defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
8316
     defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
8317
     defined(OPENSSL_ALL))
8318
/* Sub b from a modulo m: r = (a - b) % m
8319
 *
8320
 * Result is always positive.
8321
 *
8322
 * Assumes a, b, m and r are not NULL.
8323
 * m and r must not be the same pointer.
8324
 *
8325
 * @param [in]  a         SP integer to subtract from.
8326
 * @param [in]  b         SP integer to subtract.
8327
 * @param [in]  m         SP integer that is the modulus.
8328
 * @param [in]  max_size  Maximum number of digits in a and b to use.
8329
 * @param [out] r         SP integer to hold result.
8330
 */
8331
static void _sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m,
8332
    unsigned int max_size, sp_int* r)
8333
0
{
8334
#ifndef SQR_MUL_ASM
8335
    sp_int_sword w;
8336
#else
8337
0
    sp_int_digit l;
8338
0
    sp_int_digit h;
8339
0
    sp_int_digit t;
8340
0
#endif
8341
0
    volatile sp_int_digit mask;
8342
0
    volatile sp_int_digit mask_a = (sp_int_digit)-1;
8343
0
    volatile sp_int_digit mask_b = (sp_int_digit)-1;
8344
0
    unsigned int i;
8345
8346
    /* In constant time, subtract b from a putting result in r. */
8347
#ifndef SQR_MUL_ASM
8348
    w = 0;
8349
#else
8350
0
    l = 0;
8351
0
    h = 0;
8352
0
#endif
8353
0
    for (i = 0; i < max_size; i++) {
8354
        /* Values past 'used' are not initialized. */
8355
0
        mask_a += (i == a->used);
8356
0
        mask_b += (i == b->used);
8357
8358
    #ifndef SQR_MUL_ASM
8359
        /* Add a to and subtract b from current value. */
8360
        w         += a->dp[i] & mask_a;
8361
        w         -= b->dp[i] & mask_b;
8362
        /* Store low digit in result. */
8363
        r->dp[i]   = (sp_int_digit)w;
8364
        /* Move high digit down. */
8365
        w        >>= DIGIT_BIT;
8366
    #else
8367
        /* Add a and subtract b from current value. */
8368
0
        t = a->dp[i] & mask_a;
8369
0
        SP_ASM_ADDC_REG(l, h, t);
8370
0
        t = b->dp[i] & mask_b;
8371
0
        SP_ASM_SUBB_REG(l, h, t);
8372
        /* Store low digit in result. */
8373
0
        r->dp[i] = l;
8374
        /* Move high digit down. */
8375
0
        l = h;
8376
        /* High digit is 0 when positive or -1 on negative. */
8377
0
        h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
8378
0
    #endif
8379
0
    }
8380
    /* When w is negative then we need to add modulus to make result
8381
     * positive. */
8382
#ifndef SQR_MUL_ASM
8383
    mask = (sp_int_digit)0 - (w < 0);
8384
#else
8385
0
    mask = h;
8386
0
#endif
8387
8388
    /* Constant time, conditionally, add modulus to difference. */
8389
#ifndef SQR_MUL_ASM
8390
    w = 0;
8391
#else
8392
0
    l = 0;
8393
0
#endif
8394
0
    for (i = 0; i < m->used; i++) {
8395
    #ifndef SQR_MUL_ASM
8396
        /* Add result and conditionally modulus to current value. */
8397
        w         += r->dp[i];
8398
        w         += m->dp[i] & mask;
8399
        /* Store low digit in result. */
8400
        r->dp[i]   = (sp_int_digit)w;
8401
        /* Move high digit down. */
8402
        w        >>= DIGIT_BIT;
8403
    #else
8404
0
        h = 0;
8405
        /* Add result and conditionally modulus to current value. */
8406
0
        SP_ASM_ADDC(l, h, r->dp[i]);
8407
0
        t = m->dp[i] & mask;
8408
0
        SP_ASM_ADDC_REG(l, h, t);
8409
        /* Store low digit in result. */
8410
0
        r->dp[i] = l;
8411
        /* Move high digit down. */
8412
0
        l = h;
8413
0
    #endif
8414
0
    }
8415
    /* Result will always have digits equal to or less than those in
8416
     * modulus. */
8417
0
    r->used = (sp_size_t)i;
8418
#ifdef WOLFSSL_SP_INT_NEGATIVE
8419
    r->sign = MP_ZPOS;
8420
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8421
    /* Remove leading zeros. */
8422
0
    sp_clamp_ct(r);
8423
0
}
8424
#endif
8425
8426
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
8427
/* Sub b from a modulo m: r = (a - b) % m
8428
 * Result is always positive.
8429
 *
8430
 * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
8431
 *
8432
 * Assumes a, b, m and r are not NULL.
8433
 * m and r must not be the same pointer.
8434
 *
8435
 * @param [in]  a  SP integer to subtract from.
8436
 * @param [in]  b  SP integer to subtract.
8437
 * @param [in]  m  SP integer that is the modulus.
8438
 * @param [out] r  SP integer to hold result.
8439
 *
8440
 * @return  MP_OKAY on success.
8441
 */
8442
int sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8443
0
{
8444
0
    int err = MP_OKAY;
8445
8446
    /* Check result is as big as modulus. */
8447
0
    if (m->used > r->size) {
8448
0
        err = MP_VAL;
8449
0
    }
8450
    /* Validate parameters. */
8451
0
    if ((err == MP_OKAY) && (r == m)) {
8452
0
        err = MP_VAL;
8453
0
    }
8454
8455
0
    if (err == MP_OKAY) {
8456
#if 0
8457
        sp_print(a, "a");
8458
        sp_print(b, "b");
8459
        sp_print(m, "m");
8460
#endif
8461
8462
0
        _sp_submod_ct(a, b, m, m->used, r);
8463
8464
#if 0
8465
        sp_print(r, "rms");
8466
#endif
8467
0
    }
8468
8469
0
    return err;
8470
0
}
8471
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
8472
8473
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC) && \
8474
    defined(WOLFSSL_ECC_BLIND_K)
8475
/* XOR a and b into r in constant time. r = a ^ b.
8476
 *
8477
 * Assumes a, b and r have len bytes.
8478
 *
8479
 * @param [in]  a    First SP integer to XOR.
8480
 * @param [in]  b    Second SP integer to XOR.
8481
 * @param [in]  len  Number of bytes to XOR.
8482
 * @param [out] r    SP integer to hold result.
8483
 */
8484
void sp_xor_ct(const sp_int* a, const sp_int* b, int len, sp_int* r)
8485
{
8486
    if ((a != NULL) && (b != NULL) && (r != NULL)) {
8487
        unsigned int i;
8488
8489
        r->used = (len * 8 + SP_WORD_SIZE - 1) / SP_WORD_SIZE;
8490
        for (i = 0; i < r->used; i++) {
8491
            r->dp[i] = a->dp[i] ^ b->dp[i];
8492
        }
8493
        i = (len * 8) % SP_WORD_SIZE;
8494
        if (i > 0) {
8495
            r->dp[r->used - 1] &= ((sp_int_digit)1 << i) - 1;
8496
        }
8497
        /* Remove leading zeros. */
8498
        sp_clamp_ct(r);
8499
    }
8500
}
8501
#endif
8502
8503
/********************
8504
 * Shifting functions
8505
 ********************/
8506
8507
#if !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
8508
    defined(WC_RSA_BLINDING) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
8509
/* Left shift the multi-precision number by a number of digits.
8510
 *
8511
 * @param [in, out] a  SP integer to shift.
8512
 * @param [in]      s  Number of digits to shift.
8513
 *
8514
 * @return  MP_OKAY on success.
8515
 * @return  MP_VAL when a is NULL, s is negative or the result is too big.
8516
 */
8517
int sp_lshd(sp_int* a, int s)
8518
0
{
8519
0
    int err = MP_OKAY;
8520
8521
    /* Validate parameters. */
8522
0
    if ((a == NULL) || (s < 0)) {
8523
0
        err = MP_VAL;
8524
0
    }
8525
    /* Ensure number has enough digits for operation. */
8526
0
    if ((err == MP_OKAY) && (a->used + (unsigned int)s > a->size)) {
8527
0
        err = MP_VAL;
8528
0
    }
8529
0
    if (err == MP_OKAY) {
8530
        /* Move up digits. */
8531
0
        XMEMMOVE(a->dp + s, a->dp, a->used * (word32)SP_WORD_SIZEOF);
8532
        /* Back fill with zeros. */
8533
0
        XMEMSET(a->dp, 0, (size_t)s * SP_WORD_SIZEOF);
8534
        /* Update used. */
8535
0
        a->used = (sp_size_t)(a->used + s);
8536
        /* Remove leading zeros. */
8537
0
        sp_clamp(a);
8538
0
    }
8539
8540
0
    return err;
8541
0
}
8542
#endif
8543
8544
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8545
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8546
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
8547
/* Left shift the multi-precision number by n bits.
8548
 * Bits may be larger than the word size.
8549
 *
8550
 * Used by sp_mul_2d() and other internal functions.
8551
 *
8552
 * @param [in, out] a  SP integer to shift.
8553
 * @param [in]      n  Number of bits to shift left.
8554
 *
8555
 * @return  MP_OKAY on success.
8556
 * @return  MP_VAL when the result is too big.
8557
 */
8558
static int sp_lshb(sp_int* a, int n)
8559
0
{
8560
0
    int err = MP_OKAY;
8561
8562
0
    if (a->used != 0) {
8563
        /* Calculate number of digits to shift. */
8564
0
        sp_size_t s = (sp_size_t)n >> SP_WORD_SHIFT;
8565
        /* Get count of bits to move in digit. */
8566
0
        n &= (int)SP_WORD_MASK;
8567
8568
        /* Ensure number has enough digits for result. */
8569
0
        if ((n != 0) && (a->used + s >= a->size)) {
8570
0
            err = MP_VAL;
8571
0
        }
8572
0
        else if ((s > 0) && (a->used + s > a->size)) {
8573
0
            err = MP_VAL;
8574
0
        }
8575
0
        if (err == MP_OKAY) {
8576
            /* Check whether this is a complicated case. */
8577
0
            if (n != 0) {
8578
0
                unsigned int i;
8579
8580
                /* Shift up starting at most significant digit. */
8581
                /* Get new most significant digit. */
8582
0
                sp_int_digit v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
8583
                /* Shift up each digit. */
8584
0
                for (i = a->used - 1U; i >= 1U; i--) {
8585
0
                    a->dp[i + s] = (a->dp[i] << n) |
8586
0
                                   (a->dp[i - 1] >> (SP_WORD_SIZE - n));
8587
0
                }
8588
                /* Shift up least significant digit. */
8589
0
                a->dp[s] = a->dp[0] << n;
8590
                /* Add new high digit unless zero. */
8591
0
                if (v != 0) {
8592
0
                    a->dp[a->used + s] = v;
8593
0
                    a->used++;
8594
0
                }
8595
0
            }
8596
            /* Only digits to move and ensure not zero. */
8597
0
            else if (s > 0) {
8598
                /* Move up digits. */
8599
0
                XMEMMOVE(a->dp + s, a->dp, a->used * (word32)SP_WORD_SIZEOF);
8600
0
            }
8601
8602
            /* Update used digit count. */
8603
0
            a->used = (sp_size_t)(a->used + s);
8604
            /* Back fill with zeros. */
8605
0
            XMEMSET(a->dp, 0, (word32)SP_WORD_SIZEOF * s);
8606
0
        }
8607
0
    }
8608
8609
0
    return err;
8610
0
}
8611
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
8612
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
8613
8614
#ifdef WOLFSSL_SP_MATH_ALL
8615
/* Shift a right by c digits: a = a >> (c * SP_WORD_SIZE)
8616
 *
8617
 * @param [in, out] a  SP integer to shift.
8618
 * @param [in]      c  Number of digits to shift.
8619
 */
8620
void sp_rshd(sp_int* a, int c)
8621
0
{
8622
    /* Do shift if we have an SP int. */
8623
0
    if ((a != NULL) && (c > 0)) {
8624
        /* Make zero if shift removes all digits. */
8625
0
        if ((sp_size_t)c >= a->used) {
8626
0
            _sp_zero(a);
8627
0
        }
8628
0
        else {
8629
0
            sp_size_t i;
8630
8631
            /* Update used digits count. */
8632
0
            a->used = (sp_size_t)(a->used - c);
8633
            /* Move digits down. */
8634
0
            for (i = 0; i < a->used; i++, c++) {
8635
0
                a->dp[i] = a->dp[c];
8636
0
            }
8637
0
        }
8638
0
    }
8639
0
}
8640
#endif /* WOLFSSL_SP_MATH_ALL */
8641
8642
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8643
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
8644
    defined(WOLFSSL_HAVE_SP_DH)
8645
/* Shift a right by n bits into r: r = a >> n
8646
 *
8647
 * @param [in]  a  SP integer to shift.
8648
 * @param [in]  n  Number of bits to shift.
8649
 * @param [out] r  SP integer to store result in.
8650
 */
8651
int sp_rshb(const sp_int* a, int n, sp_int* r)
8652
0
{
8653
0
    int err = MP_OKAY;
8654
    /* Number of digits to shift down. */
8655
0
    sp_size_t i;
8656
8657
0
    if ((a == NULL) || (n < 0)) {
8658
0
        err = MP_VAL;
8659
0
    }
8660
    /* Handle case where shifting out all digits. */
8661
0
    else if ((i = (sp_size_t)(n >> SP_WORD_SHIFT)) >= a->used) {
8662
0
        _sp_zero(r);
8663
0
    }
8664
    /* Change callers when more error cases returned. */
8665
0
    else if ((err == MP_OKAY) && (a->used - i > r->size)) {
8666
0
        err = MP_VAL;
8667
0
    }
8668
0
    else if (err == MP_OKAY) {
8669
0
        sp_size_t j;
8670
8671
        /* Number of bits to shift in digits. */
8672
0
        n &= SP_WORD_SIZE - 1;
8673
        /* Handle simple case. */
8674
0
        if (n == 0) {
8675
            /* Set the count of used digits. */
8676
0
            r->used = (sp_size_t)(a->used - i);
8677
            /* Move digits down. */
8678
0
            if (r == a) {
8679
0
                XMEMMOVE(r->dp, r->dp + i, (word32)SP_WORD_SIZEOF * r->used);
8680
0
            }
8681
0
            else {
8682
0
                XMEMCPY(r->dp, a->dp + i, (word32)SP_WORD_SIZEOF * r->used);
8683
0
            }
8684
0
        }
8685
0
        else {
8686
            /* Move the bits down starting at least significant digit. */
8687
0
            for (j = 0; j < (sp_size_t)(a->used - 1 - i); j++)
8688
0
                r->dp[j] = (a->dp[j+i] >> n) |
8689
0
                    (a->dp[j+i+1] << (SP_WORD_SIZE - n));
8690
            /* Most significant digit has no higher digit to pull from. */
8691
0
            r->dp[j] = a->dp[j+i] >> n;
8692
            /* Set the count of used digits. */
8693
0
            r->used = (sp_size_t)(j + (r->dp[j] > 0));
8694
0
        }
8695
#ifdef WOLFSSL_SP_INT_NEGATIVE
8696
        if (sp_iszero(r)) {
8697
            /* Set zero sign. */
8698
            r->sign = MP_ZPOS;
8699
        }
8700
        else {
8701
            /* Retain sign. */
8702
            r->sign = a->sign;
8703
        }
8704
#endif
8705
0
    }
8706
8707
0
    return err;
8708
0
}
8709
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
8710
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
8711
8712
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8713
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8714
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
8715
static void _sp_div_same_size(sp_int* a, const sp_int* d, sp_int* r)
8716
0
{
8717
0
    sp_size_t i;
8718
8719
    /* Compare top digits of dividend with those of divisor up to last. */
8720
0
    for (i = (sp_size_t)(d->used - 1U); i > 0; i--) {
8721
        /* Break if top divisor is not equal to dividend. */
8722
0
        if (a->dp[a->used - d->used + i] != d->dp[i]) {
8723
0
            break;
8724
0
        }
8725
0
    }
8726
    /* Check if top dividend is greater than or equal to divisor. */
8727
0
    if (a->dp[a->used - d->used + i] >= d->dp[i]) {
8728
        /* Update quotient result. */
8729
0
        r->dp[a->used - d->used] += 1;
8730
        /* Get 'used' to restore - ensure zeros put into quotient. */
8731
0
        i = a->used;
8732
        /* Subtract d from top of a. */
8733
0
        _sp_sub_off(a, d, a, (sp_size_t)(a->used - d->used));
8734
        /* Restore 'used' on remainder. */
8735
0
        a->used = i;
8736
0
    }
8737
0
}
8738
8739
/* Divide a by d and return the quotient in r and the remainder in a.
8740
 *   r = a / d; a = a % d
8741
 *
8742
 * Note: a is constantly having multiplies of d subtracted.
8743
 *
8744
 * @param [in, out] a      SP integer to be divided and remainder on out.
8745
 * @param [in]      d      SP integer to divide by.
8746
 * @param [out]     r      SP integer that is the quotient.
8747
 * @param [out]     trial  SP integer that is product in trial division.
8748
 *
8749
 * @return  MP_OKAY on success.
8750
 * @return  MP_VAL when operation fails - only when compiling small code.
8751
 */
8752
static int _sp_div_impl(sp_int* a, const sp_int* d, sp_int* r, sp_int* trial)
8753
0
{
8754
0
    int err = MP_OKAY;
8755
0
    sp_size_t i;
8756
#ifdef WOLFSSL_SP_SMALL
8757
    int c;
8758
#else
8759
0
    sp_size_t j;
8760
0
    sp_size_t o;
8761
    #ifndef SQR_MUL_ASM
8762
    sp_int_sword sw;
8763
    #else
8764
0
    sp_int_digit sl;
8765
0
    sp_int_digit sh;
8766
0
    sp_int_digit st;
8767
0
    #endif
8768
0
#endif /* WOLFSSL_SP_SMALL */
8769
0
    sp_int_digit t;
8770
0
    sp_int_digit dt;
8771
8772
    /* Set result size to clear. */
8773
0
    r->used = (sp_size_t)(a->used - d->used + 1);
8774
    /* Set all potentially used digits to zero. */
8775
0
    for (i = 0; i < r->used; i++) {
8776
0
        r->dp[i] = 0;
8777
0
    }
8778
#ifdef WOLFSSL_SP_INT_NEGATIVE
8779
    r->sign = MP_ZPOS;
8780
#endif
8781
    /* Get the most significant digit (will have top bit set). */
8782
0
    dt = d->dp[d->used-1];
8783
8784
    /* Handle when a >= d ^ (2 ^ (SP_WORD_SIZE * x)). */
8785
0
    _sp_div_same_size(a, d, r);
8786
8787
    /* Keep subtracting multiples of d as long as the digit count of a is
8788
     * greater than equal to d.
8789
     */
8790
0
    for (i = (sp_size_t)(a->used - 1U); i >= d->used; i--) {
8791
        /* When top digits equal, guestimate maximum multiplier.
8792
         * Worst case, multiplier is actually SP_DIGIT_MAX - 1.
8793
         * That is, for w (word size in bits) > 1, n > 1, let:
8794
         *   a = 2^((n+1)*w-1), d = 2^(n*w-1) + 2^((n-1)*w) - 1, t = 2^w - 2
8795
         * Then,
8796
         *     d * t
8797
         *   = (2^(n*w-1) + 2^((n-1)*w) - 1) * (2^w - 2)
8798
         *   = 2^((n+1)*w-1) - 2^(n*w) + 2^(n*w) - 2^((n-1)*w+1) - 2^w + 2
8799
         *   = 2^((n+1)*w-1) - 2^((n-1)*w+1) - 2^w + 2
8800
         *   = a - 2^((n-1)*w+1) - 2^w + 2
8801
         * d > 2^((n-1)*w+1) + 2^w - 2, when w > 1, n > 1
8802
         */
8803
0
        if (a->dp[i] == dt) {
8804
0
            t = SP_DIGIT_MAX;
8805
0
        }
8806
0
        else {
8807
            /* Calculate trial quotient by dividing top word of dividend by top
8808
             * digit of divisor.
8809
             * Some implementations segfault when quotient > SP_DIGIT_MAX.
8810
             * Implementations in assembly, using builtins or using
8811
             * digits only (WOLFSSL_SP_DIV_WORD_HALF).
8812
             */
8813
0
            t = sp_div_word(a->dp[i], a->dp[i-1], dt);
8814
0
        }
8815
#ifdef WOLFSSL_SP_SMALL
8816
        do {
8817
            /* Calculate trial from trial quotient. */
8818
            err = _sp_mul_d(d, t, trial, i - d->used);
8819
            if (err != MP_OKAY) {
8820
                break;
8821
            }
8822
            /* Check if trial is bigger. */
8823
            c = _sp_cmp_abs(trial, a);
8824
            if (c == MP_GT) {
8825
                /* Decrement trial quotient and try again. */
8826
                t--;
8827
            }
8828
        }
8829
        while (c == MP_GT);
8830
8831
        if (err != MP_OKAY) {
8832
            break;
8833
        }
8834
8835
        /* Subtract the trial and add quotient to result. */
8836
        _sp_sub_off(a, trial, a, 0);
8837
        r->dp[i - d->used] += t;
8838
        /* Handle overflow of digit. */
8839
        if (r->dp[i - d->used] < t) {
8840
            r->dp[i + 1 - d->used]++;
8841
        }
8842
#else
8843
        /* Index of lowest digit trial is subtracted from. */
8844
0
        o = (sp_size_t)(i - d->used);
8845
0
        do {
8846
        #ifndef SQR_MUL_ASM
8847
            sp_int_word tw = 0;
8848
        #else
8849
0
            sp_int_digit tl = 0;
8850
0
            sp_int_digit th = 0;
8851
0
        #endif
8852
8853
            /* Multiply divisor by trial quotient. */
8854
0
            for (j = 0; j < d->used; j++) {
8855
            #ifndef SQR_MUL_ASM
8856
                tw += (sp_int_word)d->dp[j] * t;
8857
                trial->dp[j] = (sp_int_digit)tw;
8858
                tw >>= SP_WORD_SIZE;
8859
            #else
8860
0
                SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
8861
0
                trial->dp[j] = tl;
8862
0
                tl = th;
8863
0
                th = 0;
8864
0
            #endif
8865
0
            }
8866
          #ifndef SQR_MUL_ASM
8867
            trial->dp[j] = (sp_int_digit)tw;
8868
          #else
8869
0
            trial->dp[j] = tl;
8870
0
          #endif
8871
8872
            /* Check trial quotient isn't larger than dividend. */
8873
0
            for (j = d->used; j > 0; j--) {
8874
0
                if (trial->dp[j] != a->dp[j + o]) {
8875
0
                    break;
8876
0
                }
8877
0
            }
8878
            /* Decrement trial quotient if larger and try again. */
8879
0
            if (trial->dp[j] > a->dp[j + o]) {
8880
0
                t--;
8881
0
            }
8882
0
        }
8883
0
        while (trial->dp[j] > a->dp[j + o]);
8884
8885
    #ifndef SQR_MUL_ASM
8886
        sw = 0;
8887
    #else
8888
0
        sl = 0;
8889
0
        sh = 0;
8890
0
    #endif
8891
        /* Subtract trial - don't need to update used. */
8892
0
        for (j = 0; j <= d->used; j++) {
8893
        #ifndef SQR_MUL_ASM
8894
            sw += a->dp[j + o];
8895
            sw -= trial->dp[j];
8896
            a->dp[j + o] = (sp_int_digit)sw;
8897
            sw >>= SP_WORD_SIZE;
8898
        #else
8899
0
            st = a->dp[j + o];
8900
0
            SP_ASM_ADDC(sl, sh, st);
8901
0
            st = trial->dp[j];
8902
0
            SP_ASM_SUBB(sl, sh, st);
8903
0
            a->dp[j + o] = sl;
8904
0
            sl = sh;
8905
0
            sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
8906
0
        #endif
8907
0
        }
8908
8909
0
        r->dp[o] = t;
8910
0
#endif /* WOLFSSL_SP_SMALL */
8911
0
    }
8912
    /* Update used. */
8913
0
    a->used = (sp_size_t)(i + 1U);
8914
0
    if (a->used == d->used) {
8915
        /* Finish div now that length of dividend is same as divisor. */
8916
0
        _sp_div_same_size(a, d, r);
8917
0
    }
8918
8919
0
    return err;
8920
0
}
8921
8922
/* Divide a by d and return the quotient in r and the remainder in rem.
8923
 *   r = a / d; rem = a % d
8924
 *
8925
 * @param [in]  a     SP integer to be divided.
8926
 * @param [in]  d     SP integer to divide by.
8927
 * @param [out] r     SP integer that is the quotient. May be NULL.
8928
 * @param [out] rem   SP integer that is the remainder. May be NULL.
8929
 * @param [in]  used  Number of digits in temporaries to use.
8930
 *
8931
 * @return  MP_OKAY on success.
8932
 * @return  MP_MEM when dynamic memory allocation fails.
8933
 */
8934
static int _sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem,
8935
    unsigned int used)
8936
0
{
8937
0
    int err = MP_OKAY;
8938
0
    int ret;
8939
0
    int done = 0;
8940
0
    int s = 0;
8941
0
    sp_int* sa = NULL;
8942
0
    sp_int* sd = NULL;
8943
0
    sp_int* tr = NULL;
8944
0
    sp_int* trial = NULL;
8945
#ifdef WOLFSSL_SP_INT_NEGATIVE
8946
    sp_uint8 signA = MP_ZPOS;
8947
    sp_uint8 signD = MP_ZPOS;
8948
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8949
    /* Intermediates will always be less than or equal to dividend. */
8950
0
    DECL_SP_INT_ARRAY(td, used, 4);
8951
8952
#ifdef WOLFSSL_SP_INT_NEGATIVE
8953
    /* Cache sign for results. */
8954
    signA = a->sign;
8955
    signD = d->sign;
8956
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8957
8958
    /* Handle simple case of: dividend < divisor. */
8959
0
    ret = _sp_cmp_abs(a, d);
8960
0
    if (ret == MP_LT) {
8961
        /* a = 0 * d + a */
8962
0
        if ((rem != NULL) && (a != rem)) {
8963
0
            _sp_copy(a, rem);
8964
0
        }
8965
0
        if (r != NULL) {
8966
0
            _sp_set(r, 0);
8967
0
        }
8968
0
        done = 1;
8969
0
    }
8970
    /* Handle simple case of: dividend == divisor. */
8971
0
    else if (ret == MP_EQ) {
8972
        /* a = 1 * d + 0 */
8973
0
        if (rem != NULL) {
8974
0
            _sp_set(rem, 0);
8975
0
        }
8976
0
        if (r != NULL) {
8977
0
            _sp_set(r, 1);
8978
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8979
            r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
8980
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
8981
0
        }
8982
0
        done = 1;
8983
0
    }
8984
0
    else if (sp_count_bits(a) == sp_count_bits(d)) {
8985
        /* a is greater than d but same bit length - subtract. */
8986
0
        if (rem != NULL) {
8987
0
            _sp_sub_off(a, d, rem, 0);
8988
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8989
            rem->sign = signA;
8990
        #endif
8991
0
        }
8992
0
        if (r != NULL) {
8993
0
            _sp_set(r, 1);
8994
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8995
            r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
8996
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
8997
0
        }
8998
0
        done = 1;
8999
0
    }
9000
9001
    /* Allocate temporary 'sp_int's and assign. */
9002
0
    if ((!done) && (err == MP_OKAY)) {
9003
    #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
9004
        !defined(WOLFSSL_SP_NO_MALLOC)
9005
        unsigned int cnt = 4;
9006
        /* Reuse remainder sp_int where possible. */
9007
        if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
9008
            sa = rem;
9009
            cnt--;
9010
        }
9011
        /* Reuse result sp_int where possible. */
9012
        if ((r != NULL) && (r != d)) {
9013
            tr = r;
9014
            cnt--;
9015
        }
9016
        /* Macro always has code associated with it and checks err first. */
9017
        ALLOC_SP_INT_ARRAY(td, used, cnt, err, NULL);
9018
    #else
9019
0
        ALLOC_SP_INT_ARRAY(td, used, 4, err, NULL);
9020
0
    #endif
9021
0
    }
9022
0
    if ((!done) && (err == MP_OKAY)) {
9023
    #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
9024
        !defined(WOLFSSL_SP_NO_MALLOC)
9025
        int i = 2;
9026
9027
        /* Set to temporary when not reusing. */
9028
        if (sa == NULL) {
9029
            sa = td[i++];
9030
            _sp_init_size(sa, used);
9031
        }
9032
        if (tr == NULL) {
9033
            tr = td[i];
9034
            _sp_init_size(tr, (unsigned int)(a->used - d->used + 2));
9035
        }
9036
    #else
9037
0
        sa    = td[2];
9038
0
        tr    = td[3];
9039
9040
0
        _sp_init_size(sa, used);
9041
0
        _sp_init_size(tr, (unsigned int)(a->used - d->used + 2));
9042
0
    #endif
9043
0
        sd    = td[0];
9044
0
        trial = td[1];
9045
9046
        /* Initialize sizes to minimal values. */
9047
0
        _sp_init_size(sd, (sp_size_t)(d->used + 1U));
9048
0
        _sp_init_size(trial, used);
9049
9050
        /* Move divisor to top of word. Adjust dividend as well. */
9051
0
        s = sp_count_bits(d);
9052
0
        s = SP_WORD_SIZE - (s & (int)SP_WORD_MASK);
9053
0
        _sp_copy(a, sa);
9054
        /* Only shift if top bit of divisor no set. */
9055
0
        if (s != SP_WORD_SIZE) {
9056
0
            err = sp_lshb(sa, s);
9057
0
            if (err == MP_OKAY) {
9058
0
                _sp_copy(d, sd);
9059
0
                d = sd;
9060
0
                err = sp_lshb(sd, s);
9061
0
            }
9062
0
        }
9063
0
    }
9064
0
    if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
9065
        /* Do division: tr = sa / d, sa = sa % d. */
9066
0
        err = _sp_div_impl(sa, d, tr, trial);
9067
        /* Return the remainder if required. */
9068
0
        if ((err == MP_OKAY) && (rem != NULL)) {
9069
            /* Move result back down if moved up for divisor value. */
9070
0
            if (s != SP_WORD_SIZE) {
9071
0
                (void)sp_rshb(sa, s, sa);
9072
0
            }
9073
0
            _sp_copy(sa, rem);
9074
0
            sp_clamp(rem);
9075
        #ifdef WOLFSSL_SP_INT_NEGATIVE
9076
            rem->sign = (rem->used == 0) ? MP_ZPOS : signA;
9077
        #endif
9078
0
        }
9079
        /* Return the quotient if required. */
9080
0
        if ((err == MP_OKAY) && (r != NULL)) {
9081
0
            _sp_copy(tr, r);
9082
0
            sp_clamp(r);
9083
        #ifdef WOLFSSL_SP_INT_NEGATIVE
9084
            if ((r->used == 0) || (signA == signD)) {
9085
                r->sign = MP_ZPOS;
9086
            }
9087
            else {
9088
                r->sign = MP_NEG;
9089
            }
9090
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
9091
0
        }
9092
0
    }
9093
9094
0
    FREE_SP_INT_ARRAY(td, NULL);
9095
0
    return err;
9096
0
}
9097
9098
/* Divide a by d and return the quotient in r and the remainder in rem.
9099
 *   r = a / d; rem = a % d
9100
 *
9101
 * @param [in]  a    SP integer to be divided.
9102
 * @param [in]  d    SP integer to divide by.
9103
 * @param [out] r    SP integer that is the quotient. May be NULL.
9104
 * @param [out] rem  SP integer that is the remainder. May be NULL.
9105
 *
9106
 * @return  MP_OKAY on success.
9107
 * @return  MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
9108
 * @return  MP_MEM when dynamic memory allocation fails.
9109
 */
9110
int sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem)
9111
0
{
9112
0
    int err = MP_OKAY;
9113
0
    unsigned int used = 1;
9114
9115
    /* Validate parameters. */
9116
0
    if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
9117
0
        err = MP_VAL;
9118
0
    }
9119
    /* a / 0 = infinity. */
9120
0
    if ((err == MP_OKAY) && sp_iszero(d)) {
9121
0
        err = MP_VAL;
9122
0
    }
9123
    /* Ensure quotient result has enough memory. */
9124
0
    if ((err == MP_OKAY) && (r != NULL) && (r->size + d->used < a->used + 2)) {
9125
0
        err = MP_VAL;
9126
0
    }
9127
0
    if ((err == MP_OKAY) && (rem != NULL)) {
9128
        /* Ensure remainder has enough memory. */
9129
0
        if ((a->used <= d->used) && (rem->size < a->used + 1)) {
9130
0
            err = MP_VAL;
9131
0
        }
9132
0
        else if ((a->used > d->used) && (rem->size < d->used + 1)) {
9133
0
            err = MP_VAL;
9134
0
        }
9135
0
    }
9136
0
    if (err == MP_OKAY) {
9137
0
        if (a->used == SP_INT_DIGITS) {
9138
            /* May need to shift number being divided left into a new word. */
9139
0
            int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
9140
0
            if ((bits != SP_WORD_SIZE) && (sp_count_bits(a) + bits >
9141
0
                    (int)(SP_INT_DIGITS * SP_WORD_SIZE))) {
9142
0
                err = MP_VAL;
9143
0
            }
9144
0
            else {
9145
0
                used = SP_INT_DIGITS;
9146
0
            }
9147
0
        }
9148
0
        else {
9149
0
            used = (sp_size_t)(a->used + 1U);
9150
0
        }
9151
0
    }
9152
9153
0
    if (err == MP_OKAY) {
9154
    #if 0
9155
        sp_print(a, "a");
9156
        sp_print(d, "b");
9157
    #endif
9158
        /* Do operation. */
9159
0
        err = _sp_div(a, d, r, rem, used);
9160
    #if 0
9161
        if (err == MP_OKAY) {
9162
            if (rem != NULL) {
9163
                sp_print(rem, "rdr");
9164
            }
9165
            if (r != NULL) {
9166
                sp_print(r, "rdw");
9167
            }
9168
        }
9169
    #endif
9170
0
    }
9171
9172
0
    return err;
9173
0
}
9174
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
9175
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
9176
9177
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
9178
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
9179
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
9180
#ifndef FREESCALE_LTC_TFM
9181
#ifdef WOLFSSL_SP_INT_NEGATIVE
9182
/* Calculate the remainder of dividing a by m: r = a mod m.
9183
 *
9184
 * Parameter r can be the same pointer as parameter m.
9185
 *
9186
 * @param [in]  a  SP integer to reduce.
9187
 * @param [in]  m  SP integer that is the modulus.
9188
 * @param [out] r  SP integer to store result in.
9189
 *
9190
 * @return  MP_OKAY on success.
9191
 * @return  MP_MEM when dynamic memory allocation fails.
9192
 */
9193
static int _sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
9194
{
9195
    int err = MP_OKAY;
9196
    /* Remainder will start as a. */
9197
    DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
9198
9199
    /* In case remainder is modulus - allocate temporary. */
9200
    ALLOC_SP_INT(t, a->used + 1, err, NULL);
9201
    if (err == MP_OKAY) {
9202
        _sp_init_size(t, a->used + 1);
9203
        /* Use divide to calculate remainder and don't get quotient. */
9204
        err = sp_div(a, m, NULL, t);
9205
    }
9206
    if (err == MP_OKAY) {
9207
        /* Make remainder positive and copy into result. */
9208
        if ((!sp_iszero(t)) && (t->sign != m->sign)) {
9209
            err = sp_add(t, m, r);
9210
        }
9211
        else {
9212
            _sp_copy(t, r);
9213
        }
9214
    }
9215
    FREE_SP_INT(t, NULL);
9216
9217
    return err;
9218
}
9219
#endif
9220
9221
/* Calculate the remainder of dividing a by m: r = a mod m.
9222
 *
9223
 * @param [in]  a  SP integer to reduce.
9224
 * @param [in]  m  SP integer that is the modulus.
9225
 * @param [out] r  SP integer to store result in.
9226
 *
9227
 * @return  MP_OKAY on success.
9228
 * @return  MP_VAL when a, m or r is NULL or m is 0.
9229
 * @return  MP_MEM when dynamic memory allocation fails.
9230
 */
9231
int sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
9232
0
{
9233
0
    int err = MP_OKAY;
9234
9235
    /* Validate parameters. */
9236
0
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
9237
0
        err = MP_VAL;
9238
0
    }
9239
    /* Ensure a isn't too big a number to operate on. */
9240
0
    else if (a->used >= SP_INT_DIGITS) {
9241
0
        err = MP_VAL;
9242
0
    }
9243
9244
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
9245
0
    if (err == MP_OKAY) {
9246
        /* Use divide to calculate remainder and don't get quotient. */
9247
0
        err = sp_div(a, m, NULL, r);
9248
0
    }
9249
#else
9250
    if ((err == MP_OKAY) && (r != m)) {
9251
        err = sp_div(a, m, NULL, r);
9252
        if ((err == MP_OKAY) && (!sp_iszero(r)) && (r->sign != m->sign)) {
9253
            err = sp_add(r, m, r);
9254
        }
9255
    }
9256
    else if (err == MP_OKAY) {
9257
        err = _sp_mod(a, m, r);
9258
    }
9259
#endif /* WOLFSSL_SP_INT_NEGATIVE */
9260
9261
0
    return err;
9262
0
}
9263
#endif /* !FREESCALE_LTC_TFM */
9264
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
9265
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
9266
9267
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
9268
    defined(HAVE_ECC) || !defined(NO_RSA)
9269
9270
/* START SP_MUL implementations. */
9271
/* This code is generated.
9272
 * To generate:
9273
 *   cd scripts/sp/sp_int
9274
 *   ./gen.sh
9275
 * File sp_mul.c contains code.
9276
 */
9277
9278
#ifdef SQR_MUL_ASM
9279
/* Multiply a by b into r where a and b have same number of digits. r = a * b
9280
 *
9281
 * Optimized code for when number of digits in a and b are the same.
9282
 *
9283
 * @param [in]  a  SP integer to multiply.
9284
 * @param [in]  b  SP integer to multiply by.
9285
 * @param [out] r  SP integer to hold result.
9286
 *
9287
 * @return  MP_OKAY otherwise.
9288
 * @return  MP_MEM when dynamic memory allocation fails.
9289
 */
9290
static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r)
9291
0
{
9292
0
    int err = MP_OKAY;
9293
0
    unsigned int i;
9294
0
    int j;
9295
0
    unsigned int k;
9296
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9297
    sp_int_digit* t = NULL;
9298
#elif defined(WOLFSSL_SP_DYN_STACK)
9299
    sp_int_digit t[a->used];
9300
#else
9301
    sp_int_digit t[SP_INT_DIGITS / 2];
9302
#endif
9303
9304
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9305
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * a->used, NULL,
9306
        DYNAMIC_TYPE_BIGINT);
9307
    if (t == NULL) {
9308
        err = MP_MEM;
9309
    }
9310
#endif
9311
0
    if (err == MP_OKAY) {
9312
0
        sp_int_digit l;
9313
0
        sp_int_digit h;
9314
0
        sp_int_digit o;
9315
0
        const sp_int_digit* dp;
9316
9317
0
        h = 0;
9318
0
        l = 0;
9319
0
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9320
0
        t[0] = h;
9321
0
        h = 0;
9322
0
        o = 0;
9323
0
        for (k = 1; k <= (unsigned int)a->used - 1; k++) {
9324
0
            j = (int)k;
9325
0
            dp = a->dp;
9326
0
            for (; j >= 0; dp++, j--) {
9327
0
                SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
9328
0
            }
9329
0
            t[k] = l;
9330
0
            l = h;
9331
0
            h = o;
9332
0
            o = 0;
9333
0
        }
9334
0
        for (; k <= ((unsigned int)a->used - 1) * 2; k++) {
9335
0
            i = k - (sp_size_t)(b->used - 1);
9336
0
            dp = &b->dp[b->used - 1];
9337
0
            for (; i < a->used; i++, dp--) {
9338
0
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
9339
0
            }
9340
0
            r->dp[k] = l;
9341
0
            l = h;
9342
0
            h = o;
9343
0
            o = 0;
9344
0
        }
9345
0
        r->dp[k] = l;
9346
0
        XMEMCPY(r->dp, t, a->used * sizeof(sp_int_digit));
9347
0
        r->used = (sp_size_t)(k + 1);
9348
0
        sp_clamp(r);
9349
0
    }
9350
9351
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9352
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9353
#endif
9354
0
    return err;
9355
0
}
9356
9357
/* Multiply a by b into r. r = a * b
9358
 *
9359
 * @param [in]  a  SP integer to multiply.
9360
 * @param [in]  b  SP integer to multiply by.
9361
 * @param [out] r  SP integer to hold result.
9362
 *
9363
 * @return  MP_OKAY otherwise.
9364
 * @return  MP_MEM when dynamic memory allocation fails.
9365
 */
9366
static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
9367
0
{
9368
0
    int err = MP_OKAY;
9369
0
    sp_size_t i;
9370
0
    int j;
9371
0
    sp_size_t k;
9372
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9373
    sp_int_digit* t = NULL;
9374
#elif defined(WOLFSSL_SP_DYN_STACK)
9375
    sp_int_digit t[a->used + b->used];
9376
#else
9377
    sp_int_digit t[SP_INT_DIGITS];
9378
#endif
9379
9380
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9381
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
9382
        (size_t)(a->used + b->used), NULL, DYNAMIC_TYPE_BIGINT);
9383
    if (t == NULL) {
9384
        err = MP_MEM;
9385
    }
9386
#endif
9387
0
    if (err == MP_OKAY) {
9388
0
        sp_int_digit l;
9389
0
        sp_int_digit h;
9390
0
        sp_int_digit o;
9391
9392
0
        h = 0;
9393
0
        l = 0;
9394
0
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9395
0
        t[0] = h;
9396
0
        h = 0;
9397
0
        o = 0;
9398
0
        for (k = 1; k <= (sp_size_t)(b->used - 1); k++) {
9399
0
            i = 0;
9400
0
            j = (int)k;
9401
0
            for (; (i < a->used) && (j >= 0); i++, j--) {
9402
0
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
9403
0
            }
9404
0
            t[k] = l;
9405
0
            l = h;
9406
0
            h = o;
9407
0
            o = 0;
9408
0
        }
9409
0
        for (; k <= (sp_size_t)((a->used - 1) + (b->used - 1)); k++) {
9410
0
            j = (int)(b->used - 1);
9411
0
            i = (sp_size_t)(k - (sp_size_t)j);
9412
0
            for (; (i < a->used) && (j >= 0); i++, j--) {
9413
0
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
9414
0
            }
9415
0
            t[k] = l;
9416
0
            l = h;
9417
0
            h = o;
9418
0
            o = 0;
9419
0
        }
9420
0
        t[k] = l;
9421
0
        r->used = (sp_size_t)(k + 1);
9422
0
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9423
0
        sp_clamp(r);
9424
0
    }
9425
9426
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9427
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9428
#endif
9429
0
    return err;
9430
0
}
9431
#else
9432
/* Multiply a by b into r. r = a * b
9433
 *
9434
 * @param [in]  a  SP integer to multiply.
9435
 * @param [in]  b  SP integer to multiply by.
9436
 * @param [out] r  SP integer to hold result.
9437
 *
9438
 * @return  MP_OKAY otherwise.
9439
 * @return  MP_MEM when dynamic memory allocation fails.
9440
 */
9441
static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
9442
{
9443
    int err = MP_OKAY;
9444
    sp_size_t i;
9445
    int j;
9446
    sp_size_t k;
9447
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9448
    sp_int_digit* t = NULL;
9449
#elif defined(WOLFSSL_SP_DYN_STACK)
9450
    sp_int_digit t[a->used + b->used];
9451
#else
9452
    sp_int_digit t[SP_INT_DIGITS];
9453
#endif
9454
9455
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9456
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
9457
        (size_t)(a->used + b->used), NULL, DYNAMIC_TYPE_BIGINT);
9458
    if (t == NULL) {
9459
        err = MP_MEM;
9460
    }
9461
#endif
9462
    if (err == MP_OKAY) {
9463
        sp_int_word w;
9464
        sp_int_word l;
9465
        sp_int_word h;
9466
    #ifdef SP_WORD_OVERFLOW
9467
        sp_int_word o;
9468
    #endif
9469
9470
        w = (sp_int_word)a->dp[0] * b->dp[0];
9471
        t[0] = (sp_int_digit)w;
9472
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
9473
        h = 0;
9474
    #ifdef SP_WORD_OVERFLOW
9475
        o = 0;
9476
    #endif
9477
        for (k = 1; (int)k <= ((int)a->used - 1) + ((int)b->used - 1); k++) {
9478
            i = (sp_size_t)(k - (b->used - 1));
9479
            i &= (sp_size_t)(((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
9480
            j = (int)(k - i);
9481
            for (; (i < a->used) && (j >= 0); i++, j--) {
9482
                w = (sp_int_word)a->dp[i] * b->dp[j];
9483
                l += (sp_int_digit)w;
9484
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
9485
            #ifdef SP_WORD_OVERFLOW
9486
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
9487
                l &= SP_MASK;
9488
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
9489
                h &= SP_MASK;
9490
            #endif
9491
            }
9492
            t[k] = (sp_int_digit)l;
9493
            l >>= SP_WORD_SIZE;
9494
            l += (sp_int_digit)h;
9495
            h >>= SP_WORD_SIZE;
9496
        #ifdef SP_WORD_OVERFLOW
9497
            h += o & SP_MASK;
9498
            o >>= SP_WORD_SIZE;
9499
        #endif
9500
        }
9501
        t[k] = (sp_int_digit)l;
9502
        r->used = (sp_size_t)(k + 1);
9503
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9504
        sp_clamp(r);
9505
    }
9506
9507
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9508
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9509
#endif
9510
    return err;
9511
}
9512
#endif
9513
9514
#ifndef WOLFSSL_SP_SMALL
9515
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
9516
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
9517
#ifndef SQR_MUL_ASM
9518
/* Multiply a by b and store in r: r = a * b
9519
 *
9520
 * Long-hand implementation.
9521
 *
9522
 * @param [in]  a  SP integer to multiply.
9523
 * @param [in]  b  SP integer to multiply.
9524
 * @param [out] r  SP integer result.
9525
 *
9526
 * @return  MP_OKAY on success.
9527
 * @return  MP_MEM when dynamic memory allocation fails.
9528
 */
9529
static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
9530
{
9531
    int err = MP_OKAY;
9532
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9533
    sp_int_word* w = NULL;
9534
#else
9535
    sp_int_word w[16];
9536
#endif
9537
    const sp_int_digit* da = a->dp;
9538
    const sp_int_digit* db = b->dp;
9539
9540
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9541
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
9542
        DYNAMIC_TYPE_BIGINT);
9543
    if (w == NULL) {
9544
        err = MP_MEM;
9545
    }
9546
#endif
9547
9548
    if (err == MP_OKAY) {
9549
        w[0] = (sp_int_word)da[0] * db[0];
9550
        w[1] = (sp_int_word)da[0] * db[1];
9551
        w[2] = (sp_int_word)da[1] * db[0];
9552
        w[3] = (sp_int_word)da[0] * db[2];
9553
        w[4] = (sp_int_word)da[1] * db[1];
9554
        w[5] = (sp_int_word)da[2] * db[0];
9555
        w[6] = (sp_int_word)da[0] * db[3];
9556
        w[7] = (sp_int_word)da[1] * db[2];
9557
        w[8] = (sp_int_word)da[2] * db[1];
9558
        w[9] = (sp_int_word)da[3] * db[0];
9559
        w[10] = (sp_int_word)da[1] * db[3];
9560
        w[11] = (sp_int_word)da[2] * db[2];
9561
        w[12] = (sp_int_word)da[3] * db[1];
9562
        w[13] = (sp_int_word)da[2] * db[3];
9563
        w[14] = (sp_int_word)da[3] * db[2];
9564
        w[15] = (sp_int_word)da[3] * db[3];
9565
9566
        r->dp[0] = (sp_int_digit)w[0];
9567
        w[0] >>= SP_WORD_SIZE;
9568
        w[0] += (sp_int_digit)w[1];
9569
        w[0] += (sp_int_digit)w[2];
9570
        r->dp[1] = (sp_int_digit)w[0];
9571
        w[0] >>= SP_WORD_SIZE;
9572
        w[1] >>= SP_WORD_SIZE;
9573
        w[0] += (sp_int_digit)w[1];
9574
        w[2] >>= SP_WORD_SIZE;
9575
        w[0] += (sp_int_digit)w[2];
9576
        w[0] += (sp_int_digit)w[3];
9577
        w[0] += (sp_int_digit)w[4];
9578
        w[0] += (sp_int_digit)w[5];
9579
        r->dp[2] = (sp_int_digit)w[0];
9580
        w[0] >>= SP_WORD_SIZE;
9581
        w[3] >>= SP_WORD_SIZE;
9582
        w[0] += (sp_int_digit)w[3];
9583
        w[4] >>= SP_WORD_SIZE;
9584
        w[0] += (sp_int_digit)w[4];
9585
        w[5] >>= SP_WORD_SIZE;
9586
        w[0] += (sp_int_digit)w[5];
9587
        w[0] += (sp_int_digit)w[6];
9588
        w[0] += (sp_int_digit)w[7];
9589
        w[0] += (sp_int_digit)w[8];
9590
        w[0] += (sp_int_digit)w[9];
9591
        r->dp[3] = (sp_int_digit)w[0];
9592
        w[0] >>= SP_WORD_SIZE;
9593
        w[6] >>= SP_WORD_SIZE;
9594
        w[0] += (sp_int_digit)w[6];
9595
        w[7] >>= SP_WORD_SIZE;
9596
        w[0] += (sp_int_digit)w[7];
9597
        w[8] >>= SP_WORD_SIZE;
9598
        w[0] += (sp_int_digit)w[8];
9599
        w[9] >>= SP_WORD_SIZE;
9600
        w[0] += (sp_int_digit)w[9];
9601
        w[0] += (sp_int_digit)w[10];
9602
        w[0] += (sp_int_digit)w[11];
9603
        w[0] += (sp_int_digit)w[12];
9604
        r->dp[4] = (sp_int_digit)w[0];
9605
        w[0] >>= SP_WORD_SIZE;
9606
        w[10] >>= SP_WORD_SIZE;
9607
        w[0] += (sp_int_digit)w[10];
9608
        w[11] >>= SP_WORD_SIZE;
9609
        w[0] += (sp_int_digit)w[11];
9610
        w[12] >>= SP_WORD_SIZE;
9611
        w[0] += (sp_int_digit)w[12];
9612
        w[0] += (sp_int_digit)w[13];
9613
        w[0] += (sp_int_digit)w[14];
9614
        r->dp[5] = (sp_int_digit)w[0];
9615
        w[0] >>= SP_WORD_SIZE;
9616
        w[13] >>= SP_WORD_SIZE;
9617
        w[0] += (sp_int_digit)w[13];
9618
        w[14] >>= SP_WORD_SIZE;
9619
        w[0] += (sp_int_digit)w[14];
9620
        w[0] += (sp_int_digit)w[15];
9621
        r->dp[6] = (sp_int_digit)w[0];
9622
        w[0] >>= SP_WORD_SIZE;
9623
        w[15] >>= SP_WORD_SIZE;
9624
        w[0] += (sp_int_digit)w[15];
9625
        r->dp[7] = (sp_int_digit)w[0];
9626
9627
        r->used = 8;
9628
        sp_clamp(r);
9629
    }
9630
9631
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9632
    XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
9633
#endif
9634
    return err;
9635
}
9636
#else /* SQR_MUL_ASM */
9637
/* Multiply a by b and store in r: r = a * b
9638
 *
9639
 * Comba implementation.
9640
 *
9641
 * @param [in]  a  SP integer to multiply.
9642
 * @param [in]  b  SP integer to multiply.
9643
 * @param [out] r  SP integer result.
9644
 *
9645
 * @return  MP_OKAY on success.
9646
 * @return  MP_MEM when dynamic memory allocation fails.
9647
 */
9648
static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
9649
0
{
9650
0
    sp_int_digit l = 0;
9651
0
    sp_int_digit h = 0;
9652
0
    sp_int_digit o = 0;
9653
0
    sp_int_digit t[4];
9654
9655
0
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9656
0
    t[0] = h;
9657
0
    h = 0;
9658
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9659
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9660
0
    t[1] = l;
9661
0
    l = h;
9662
0
    h = o;
9663
0
    o = 0;
9664
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9665
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9666
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9667
0
    t[2] = l;
9668
0
    l = h;
9669
0
    h = o;
9670
0
    o = 0;
9671
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9672
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9673
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9674
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9675
0
    t[3] = l;
9676
0
    l = h;
9677
0
    h = o;
9678
0
    o = 0;
9679
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9680
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9681
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9682
0
    r->dp[4] = l;
9683
0
    l = h;
9684
0
    h = o;
9685
0
    o = 0;
9686
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9687
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9688
0
    r->dp[5] = l;
9689
0
    l = h;
9690
0
    h = o;
9691
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
9692
0
    r->dp[6] = l;
9693
0
    r->dp[7] = h;
9694
0
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
9695
0
    r->used = 8;
9696
0
    sp_clamp(r);
9697
9698
0
    return MP_OKAY;
9699
0
}
9700
#endif /* SQR_MUL_ASM */
9701
#endif /* SP_WORD_SIZE == 64 */
9702
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
9703
#ifdef SQR_MUL_ASM
9704
/* Multiply a by b and store in r: r = a * b
9705
 *
9706
 * Comba implementation.
9707
 *
9708
 * @param [in]  a  SP integer to multiply.
9709
 * @param [in]  b  SP integer to multiply.
9710
 * @param [out] r  SP integer result.
9711
 *
9712
 * @return  MP_OKAY on success.
9713
 * @return  MP_MEM when dynamic memory allocation fails.
9714
 */
9715
static int _sp_mul_6(const sp_int* a, const sp_int* b, sp_int* r)
9716
0
{
9717
0
    sp_int_digit l = 0;
9718
0
    sp_int_digit h = 0;
9719
0
    sp_int_digit o = 0;
9720
0
    sp_int_digit t[6];
9721
9722
0
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9723
0
    t[0] = h;
9724
0
    h = 0;
9725
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9726
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9727
0
    t[1] = l;
9728
0
    l = h;
9729
0
    h = o;
9730
0
    o = 0;
9731
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9732
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9733
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9734
0
    t[2] = l;
9735
0
    l = h;
9736
0
    h = o;
9737
0
    o = 0;
9738
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9739
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9740
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9741
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9742
0
    t[3] = l;
9743
0
    l = h;
9744
0
    h = o;
9745
0
    o = 0;
9746
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9747
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9748
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9749
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9750
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9751
0
    t[4] = l;
9752
0
    l = h;
9753
0
    h = o;
9754
0
    o = 0;
9755
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9756
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9757
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9758
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9759
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9760
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9761
0
    t[5] = l;
9762
0
    l = h;
9763
0
    h = o;
9764
0
    o = 0;
9765
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9766
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9767
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9768
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9769
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9770
0
    r->dp[6] = l;
9771
0
    l = h;
9772
0
    h = o;
9773
0
    o = 0;
9774
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9775
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9776
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9777
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9778
0
    r->dp[7] = l;
9779
0
    l = h;
9780
0
    h = o;
9781
0
    o = 0;
9782
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9783
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9784
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9785
0
    r->dp[8] = l;
9786
0
    l = h;
9787
0
    h = o;
9788
0
    o = 0;
9789
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9790
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9791
0
    r->dp[9] = l;
9792
0
    l = h;
9793
0
    h = o;
9794
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
9795
0
    r->dp[10] = l;
9796
0
    r->dp[11] = h;
9797
0
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
9798
0
    r->used = 12;
9799
0
    sp_clamp(r);
9800
9801
0
    return MP_OKAY;
9802
0
}
9803
#endif /* SQR_MUL_ASM */
9804
#endif /* SP_WORD_SIZE == 64 */
9805
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
9806
#ifdef SQR_MUL_ASM
9807
/* Multiply a by b and store in r: r = a * b
9808
 *
9809
 * Comba implementation.
9810
 *
9811
 * @param [in]  a  SP integer to multiply.
9812
 * @param [in]  b  SP integer to multiply.
9813
 * @param [out] r  SP integer result.
9814
 *
9815
 * @return  MP_OKAY on success.
9816
 * @return  MP_MEM when dynamic memory allocation fails.
9817
 */
9818
static int _sp_mul_8(const sp_int* a, const sp_int* b, sp_int* r)
9819
{
9820
    sp_int_digit l = 0;
9821
    sp_int_digit h = 0;
9822
    sp_int_digit o = 0;
9823
    sp_int_digit t[8];
9824
9825
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9826
    t[0] = h;
9827
    h = 0;
9828
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9829
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9830
    t[1] = l;
9831
    l = h;
9832
    h = o;
9833
    o = 0;
9834
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9835
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9836
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9837
    t[2] = l;
9838
    l = h;
9839
    h = o;
9840
    o = 0;
9841
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9842
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9843
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9844
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9845
    t[3] = l;
9846
    l = h;
9847
    h = o;
9848
    o = 0;
9849
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9850
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9851
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9852
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9853
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9854
    t[4] = l;
9855
    l = h;
9856
    h = o;
9857
    o = 0;
9858
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9859
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9860
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9861
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9862
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9863
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9864
    t[5] = l;
9865
    l = h;
9866
    h = o;
9867
    o = 0;
9868
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
9869
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9870
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9871
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9872
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9873
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9874
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
9875
    t[6] = l;
9876
    l = h;
9877
    h = o;
9878
    o = 0;
9879
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
9880
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
9881
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9882
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9883
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9884
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9885
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
9886
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
9887
    t[7] = l;
9888
    l = h;
9889
    h = o;
9890
    o = 0;
9891
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
9892
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
9893
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9894
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9895
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9896
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
9897
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
9898
    r->dp[8] = l;
9899
    l = h;
9900
    h = o;
9901
    o = 0;
9902
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
9903
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
9904
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9905
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9906
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
9907
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
9908
    r->dp[9] = l;
9909
    l = h;
9910
    h = o;
9911
    o = 0;
9912
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
9913
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
9914
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
9915
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
9916
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
9917
    r->dp[10] = l;
9918
    l = h;
9919
    h = o;
9920
    o = 0;
9921
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
9922
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
9923
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
9924
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
9925
    r->dp[11] = l;
9926
    l = h;
9927
    h = o;
9928
    o = 0;
9929
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
9930
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
9931
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
9932
    r->dp[12] = l;
9933
    l = h;
9934
    h = o;
9935
    o = 0;
9936
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
9937
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
9938
    r->dp[13] = l;
9939
    l = h;
9940
    h = o;
9941
    SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
9942
    r->dp[14] = l;
9943
    r->dp[15] = h;
9944
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
9945
    r->used = 16;
9946
    sp_clamp(r);
9947
9948
    return MP_OKAY;
9949
}
9950
#endif /* SQR_MUL_ASM */
9951
#endif /* SP_WORD_SIZE == 32 */
9952
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
9953
#ifdef SQR_MUL_ASM
9954
/* Multiply a by b and store in r: r = a * b
9955
 *
9956
 * Comba implementation.
9957
 *
9958
 * @param [in]  a  SP integer to multiply.
9959
 * @param [in]  b  SP integer to multiply.
9960
 * @param [out] r  SP integer result.
9961
 *
9962
 * @return  MP_OKAY on success.
9963
 * @return  MP_MEM when dynamic memory allocation fails.
9964
 */
9965
static int _sp_mul_12(const sp_int* a, const sp_int* b, sp_int* r)
9966
{
9967
    sp_int_digit l = 0;
9968
    sp_int_digit h = 0;
9969
    sp_int_digit o = 0;
9970
    sp_int_digit t[12];
9971
9972
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9973
    t[0] = h;
9974
    h = 0;
9975
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9976
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9977
    t[1] = l;
9978
    l = h;
9979
    h = o;
9980
    o = 0;
9981
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9982
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9983
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9984
    t[2] = l;
9985
    l = h;
9986
    h = o;
9987
    o = 0;
9988
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9989
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9990
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9991
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9992
    t[3] = l;
9993
    l = h;
9994
    h = o;
9995
    o = 0;
9996
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9997
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9998
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9999
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10000
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10001
    t[4] = l;
10002
    l = h;
10003
    h = o;
10004
    o = 0;
10005
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10006
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10007
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10008
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10009
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10010
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10011
    t[5] = l;
10012
    l = h;
10013
    h = o;
10014
    o = 0;
10015
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10016
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10017
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10018
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10019
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10020
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10021
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10022
    t[6] = l;
10023
    l = h;
10024
    h = o;
10025
    o = 0;
10026
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10027
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10028
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10029
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10030
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10031
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10032
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10033
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10034
    t[7] = l;
10035
    l = h;
10036
    h = o;
10037
    o = 0;
10038
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10039
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10040
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10041
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10042
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10043
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10044
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10045
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10046
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10047
    t[8] = l;
10048
    l = h;
10049
    h = o;
10050
    o = 0;
10051
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10052
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10053
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10054
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10055
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10056
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10057
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10058
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10059
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10060
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10061
    t[9] = l;
10062
    l = h;
10063
    h = o;
10064
    o = 0;
10065
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10066
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10067
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10068
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10069
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10070
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10071
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10072
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10073
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10074
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10075
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10076
    t[10] = l;
10077
    l = h;
10078
    h = o;
10079
    o = 0;
10080
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10081
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10082
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10083
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10084
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10085
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10086
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10087
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10088
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10089
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10090
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10091
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10092
    t[11] = l;
10093
    l = h;
10094
    h = o;
10095
    o = 0;
10096
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10097
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10098
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10099
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10100
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10101
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10102
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10103
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10104
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10105
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10106
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10107
    r->dp[12] = l;
10108
    l = h;
10109
    h = o;
10110
    o = 0;
10111
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10112
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10113
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10114
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10115
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10116
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10117
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10118
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10119
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10120
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10121
    r->dp[13] = l;
10122
    l = h;
10123
    h = o;
10124
    o = 0;
10125
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10126
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10127
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10128
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10129
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10130
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10131
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10132
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10133
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10134
    r->dp[14] = l;
10135
    l = h;
10136
    h = o;
10137
    o = 0;
10138
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10139
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10140
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10141
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10142
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10143
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10144
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10145
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10146
    r->dp[15] = l;
10147
    l = h;
10148
    h = o;
10149
    o = 0;
10150
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10151
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10152
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10153
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10154
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10155
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10156
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10157
    r->dp[16] = l;
10158
    l = h;
10159
    h = o;
10160
    o = 0;
10161
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10162
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10163
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10164
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10165
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10166
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10167
    r->dp[17] = l;
10168
    l = h;
10169
    h = o;
10170
    o = 0;
10171
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10172
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10173
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10174
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10175
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10176
    r->dp[18] = l;
10177
    l = h;
10178
    h = o;
10179
    o = 0;
10180
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10181
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10182
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10183
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10184
    r->dp[19] = l;
10185
    l = h;
10186
    h = o;
10187
    o = 0;
10188
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10189
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10190
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10191
    r->dp[20] = l;
10192
    l = h;
10193
    h = o;
10194
    o = 0;
10195
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10196
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10197
    r->dp[21] = l;
10198
    l = h;
10199
    h = o;
10200
    SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
10201
    r->dp[22] = l;
10202
    r->dp[23] = h;
10203
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
10204
    r->used = 24;
10205
    sp_clamp(r);
10206
10207
    return MP_OKAY;
10208
}
10209
#endif /* SQR_MUL_ASM */
10210
#endif /* SP_WORD_SIZE == 32 */
10211
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
10212
10213
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
10214
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
10215
    (SP_WORD_SIZE == 64)))
10216
    #if SP_INT_DIGITS >= 32
10217
/* Multiply a by b and store in r: r = a * b
10218
 *
10219
 * Comba implementation.
10220
 *
10221
 * @param [in]  a  SP integer to multiply.
10222
 * @param [in]  b  SP integer to multiply.
10223
 * @param [out] r  SP integer result.
10224
 *
10225
 * @return  MP_OKAY on success.
10226
 * @return  MP_MEM when dynamic memory allocation fails.
10227
 */
10228
static int _sp_mul_16(const sp_int* a, const sp_int* b, sp_int* r)
10229
{
10230
    int err = MP_OKAY;
10231
    sp_int_digit l = 0;
10232
    sp_int_digit h = 0;
10233
    sp_int_digit o = 0;
10234
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10235
    sp_int_digit* t = NULL;
10236
#else
10237
    sp_int_digit t[16];
10238
#endif
10239
10240
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10241
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
10242
         DYNAMIC_TYPE_BIGINT);
10243
     if (t == NULL) {
10244
         err = MP_MEM;
10245
     }
10246
#endif
10247
    if (err == MP_OKAY) {
10248
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
10249
        t[0] = h;
10250
        h = 0;
10251
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
10252
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
10253
        t[1] = l;
10254
        l = h;
10255
        h = o;
10256
        o = 0;
10257
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
10258
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
10259
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
10260
        t[2] = l;
10261
        l = h;
10262
        h = o;
10263
        o = 0;
10264
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
10265
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
10266
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
10267
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
10268
        t[3] = l;
10269
        l = h;
10270
        h = o;
10271
        o = 0;
10272
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
10273
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
10274
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
10275
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10276
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10277
        t[4] = l;
10278
        l = h;
10279
        h = o;
10280
        o = 0;
10281
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10282
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10283
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10284
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10285
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10286
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10287
        t[5] = l;
10288
        l = h;
10289
        h = o;
10290
        o = 0;
10291
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10292
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10293
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10294
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10295
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10296
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10297
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10298
        t[6] = l;
10299
        l = h;
10300
        h = o;
10301
        o = 0;
10302
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10303
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10304
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10305
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10306
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10307
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10308
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10309
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10310
        t[7] = l;
10311
        l = h;
10312
        h = o;
10313
        o = 0;
10314
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10315
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10316
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10317
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10318
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10319
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10320
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10321
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10322
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10323
        t[8] = l;
10324
        l = h;
10325
        h = o;
10326
        o = 0;
10327
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10328
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10329
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10330
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10331
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10332
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10333
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10334
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10335
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10336
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10337
        t[9] = l;
10338
        l = h;
10339
        h = o;
10340
        o = 0;
10341
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10342
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10343
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10344
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10345
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10346
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10347
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10348
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10349
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10350
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10351
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10352
        t[10] = l;
10353
        l = h;
10354
        h = o;
10355
        o = 0;
10356
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10357
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10358
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10359
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10360
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10361
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10362
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10363
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10364
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10365
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10366
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10367
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10368
        t[11] = l;
10369
        l = h;
10370
        h = o;
10371
        o = 0;
10372
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
10373
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10374
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10375
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10376
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10377
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10378
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10379
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10380
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10381
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10382
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10383
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10384
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
10385
        t[12] = l;
10386
        l = h;
10387
        h = o;
10388
        o = 0;
10389
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
10390
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
10391
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10392
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10393
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10394
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10395
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10396
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10397
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10398
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10399
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10400
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10401
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
10402
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
10403
        t[13] = l;
10404
        l = h;
10405
        h = o;
10406
        o = 0;
10407
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
10408
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
10409
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
10410
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10411
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10412
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10413
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10414
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10415
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10416
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10417
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10418
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10419
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
10420
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
10421
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
10422
        t[14] = l;
10423
        l = h;
10424
        h = o;
10425
        o = 0;
10426
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
10427
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
10428
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
10429
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
10430
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10431
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10432
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10433
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10434
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10435
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10436
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10437
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10438
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
10439
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
10440
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
10441
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
10442
        t[15] = l;
10443
        l = h;
10444
        h = o;
10445
        o = 0;
10446
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
10447
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
10448
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
10449
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
10450
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10451
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10452
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10453
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10454
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10455
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10456
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10457
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
10458
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
10459
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
10460
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
10461
        r->dp[16] = l;
10462
        l = h;
10463
        h = o;
10464
        o = 0;
10465
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
10466
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
10467
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
10468
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
10469
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10470
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10471
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10472
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10473
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10474
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10475
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
10476
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
10477
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
10478
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
10479
        r->dp[17] = l;
10480
        l = h;
10481
        h = o;
10482
        o = 0;
10483
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
10484
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
10485
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
10486
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
10487
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10488
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10489
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10490
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10491
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10492
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
10493
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
10494
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
10495
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
10496
        r->dp[18] = l;
10497
        l = h;
10498
        h = o;
10499
        o = 0;
10500
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
10501
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
10502
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
10503
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
10504
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10505
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10506
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10507
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10508
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
10509
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
10510
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
10511
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
10512
        r->dp[19] = l;
10513
        l = h;
10514
        h = o;
10515
        o = 0;
10516
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
10517
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
10518
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
10519
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
10520
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10521
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10522
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10523
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
10524
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
10525
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
10526
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
10527
        r->dp[20] = l;
10528
        l = h;
10529
        h = o;
10530
        o = 0;
10531
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
10532
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
10533
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
10534
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
10535
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10536
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10537
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
10538
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
10539
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
10540
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
10541
        r->dp[21] = l;
10542
        l = h;
10543
        h = o;
10544
        o = 0;
10545
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
10546
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
10547
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
10548
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
10549
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
10550
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
10551
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
10552
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
10553
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
10554
        r->dp[22] = l;
10555
        l = h;
10556
        h = o;
10557
        o = 0;
10558
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
10559
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
10560
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
10561
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
10562
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
10563
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
10564
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
10565
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
10566
        r->dp[23] = l;
10567
        l = h;
10568
        h = o;
10569
        o = 0;
10570
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
10571
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
10572
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
10573
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
10574
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
10575
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
10576
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
10577
        r->dp[24] = l;
10578
        l = h;
10579
        h = o;
10580
        o = 0;
10581
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
10582
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
10583
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
10584
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
10585
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
10586
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
10587
        r->dp[25] = l;
10588
        l = h;
10589
        h = o;
10590
        o = 0;
10591
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
10592
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
10593
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
10594
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
10595
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
10596
        r->dp[26] = l;
10597
        l = h;
10598
        h = o;
10599
        o = 0;
10600
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
10601
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
10602
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
10603
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
10604
        r->dp[27] = l;
10605
        l = h;
10606
        h = o;
10607
        o = 0;
10608
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
10609
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
10610
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
10611
        r->dp[28] = l;
10612
        l = h;
10613
        h = o;
10614
        o = 0;
10615
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
10616
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
10617
        r->dp[29] = l;
10618
        l = h;
10619
        h = o;
10620
        SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
10621
        r->dp[30] = l;
10622
        r->dp[31] = h;
10623
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
10624
        r->used = 32;
10625
        sp_clamp(r);
10626
    }
10627
10628
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10629
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
10630
#endif
10631
    return err;
10632
}
10633
    #endif /* SP_INT_DIGITS >= 32 */
10634
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
10635
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
10636
10637
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
10638
    #if SP_INT_DIGITS >= 48
10639
/* Multiply a by b and store in r: r = a * b
10640
 *
10641
 * Comba implementation.
10642
 *
10643
 * @param [in]  a  SP integer to multiply.
10644
 * @param [in]  b  SP integer to multiply.
10645
 * @param [out] r  SP integer result.
10646
 *
10647
 * @return  MP_OKAY on success.
10648
 * @return  MP_MEM when dynamic memory allocation fails.
10649
 */
10650
static int _sp_mul_24(const sp_int* a, const sp_int* b, sp_int* r)
10651
{
10652
    int err = MP_OKAY;
10653
    sp_int_digit l = 0;
10654
    sp_int_digit h = 0;
10655
    sp_int_digit o = 0;
10656
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10657
    sp_int_digit* t = NULL;
10658
#else
10659
    sp_int_digit t[24];
10660
#endif
10661
10662
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10663
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
10664
         DYNAMIC_TYPE_BIGINT);
10665
     if (t == NULL) {
10666
         err = MP_MEM;
10667
     }
10668
#endif
10669
    if (err == MP_OKAY) {
10670
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
10671
        t[0] = h;
10672
        h = 0;
10673
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
10674
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
10675
        t[1] = l;
10676
        l = h;
10677
        h = o;
10678
        o = 0;
10679
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
10680
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
10681
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
10682
        t[2] = l;
10683
        l = h;
10684
        h = o;
10685
        o = 0;
10686
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
10687
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
10688
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
10689
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
10690
        t[3] = l;
10691
        l = h;
10692
        h = o;
10693
        o = 0;
10694
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
10695
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
10696
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
10697
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10698
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10699
        t[4] = l;
10700
        l = h;
10701
        h = o;
10702
        o = 0;
10703
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10704
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10705
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10706
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10707
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10708
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10709
        t[5] = l;
10710
        l = h;
10711
        h = o;
10712
        o = 0;
10713
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10714
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10715
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10716
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10717
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10718
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10719
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10720
        t[6] = l;
10721
        l = h;
10722
        h = o;
10723
        o = 0;
10724
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10725
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10726
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10727
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10728
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10729
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10730
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10731
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10732
        t[7] = l;
10733
        l = h;
10734
        h = o;
10735
        o = 0;
10736
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10737
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10738
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10739
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10740
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10741
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10742
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10743
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10744
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10745
        t[8] = l;
10746
        l = h;
10747
        h = o;
10748
        o = 0;
10749
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10750
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10751
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10752
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10753
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10754
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10755
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10756
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10757
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10758
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10759
        t[9] = l;
10760
        l = h;
10761
        h = o;
10762
        o = 0;
10763
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10764
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10765
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10766
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10767
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10768
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10769
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10770
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10771
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10772
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10773
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10774
        t[10] = l;
10775
        l = h;
10776
        h = o;
10777
        o = 0;
10778
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10779
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10780
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10781
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10782
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10783
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10784
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10785
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10786
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10787
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10788
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10789
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10790
        t[11] = l;
10791
        l = h;
10792
        h = o;
10793
        o = 0;
10794
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
10795
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10796
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10797
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10798
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10799
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10800
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10801
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10802
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10803
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10804
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10805
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10806
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
10807
        t[12] = l;
10808
        l = h;
10809
        h = o;
10810
        o = 0;
10811
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
10812
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
10813
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10814
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10815
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10816
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10817
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10818
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10819
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10820
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10821
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10822
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10823
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
10824
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
10825
        t[13] = l;
10826
        l = h;
10827
        h = o;
10828
        o = 0;
10829
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
10830
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
10831
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
10832
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10833
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10834
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10835
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10836
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10837
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10838
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10839
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10840
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10841
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
10842
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
10843
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
10844
        t[14] = l;
10845
        l = h;
10846
        h = o;
10847
        o = 0;
10848
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
10849
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
10850
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
10851
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
10852
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10853
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10854
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10855
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10856
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10857
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10858
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10859
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10860
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
10861
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
10862
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
10863
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
10864
        t[15] = l;
10865
        l = h;
10866
        h = o;
10867
        o = 0;
10868
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
10869
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
10870
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
10871
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
10872
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
10873
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10874
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10875
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10876
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10877
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10878
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10879
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10880
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
10881
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
10882
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
10883
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
10884
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
10885
        t[16] = l;
10886
        l = h;
10887
        h = o;
10888
        o = 0;
10889
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
10890
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
10891
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
10892
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
10893
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
10894
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
10895
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10896
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10897
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10898
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10899
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10900
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10901
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
10902
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
10903
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
10904
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
10905
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
10906
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
10907
        t[17] = l;
10908
        l = h;
10909
        h = o;
10910
        o = 0;
10911
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
10912
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
10913
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
10914
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
10915
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
10916
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
10917
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
10918
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10919
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10920
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10921
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10922
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10923
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
10924
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
10925
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
10926
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
10927
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
10928
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
10929
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
10930
        t[18] = l;
10931
        l = h;
10932
        h = o;
10933
        o = 0;
10934
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
10935
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
10936
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
10937
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
10938
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
10939
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
10940
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
10941
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
10942
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10943
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10944
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10945
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10946
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
10947
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
10948
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
10949
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
10950
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
10951
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
10952
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
10953
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
10954
        t[19] = l;
10955
        l = h;
10956
        h = o;
10957
        o = 0;
10958
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
10959
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
10960
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
10961
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
10962
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
10963
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
10964
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
10965
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
10966
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
10967
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10968
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10969
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10970
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
10971
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
10972
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
10973
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
10974
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
10975
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
10976
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
10977
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
10978
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
10979
        t[20] = l;
10980
        l = h;
10981
        h = o;
10982
        o = 0;
10983
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
10984
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
10985
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
10986
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
10987
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
10988
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
10989
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
10990
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
10991
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
10992
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
10993
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10994
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10995
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
10996
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
10997
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
10998
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
10999
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
11000
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
11001
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
11002
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
11003
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
11004
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
11005
        t[21] = l;
11006
        l = h;
11007
        h = o;
11008
        o = 0;
11009
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
11010
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
11011
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
11012
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
11013
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
11014
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
11015
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
11016
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
11017
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
11018
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
11019
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
11020
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
11021
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
11022
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
11023
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
11024
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
11025
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
11026
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
11027
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
11028
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
11029
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
11030
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
11031
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
11032
        t[22] = l;
11033
        l = h;
11034
        h = o;
11035
        o = 0;
11036
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
11037
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
11038
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
11039
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
11040
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
11041
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
11042
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
11043
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
11044
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
11045
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
11046
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
11047
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
11048
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
11049
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
11050
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
11051
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
11052
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
11053
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
11054
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
11055
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
11056
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
11057
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
11058
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
11059
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
11060
        t[23] = l;
11061
        l = h;
11062
        h = o;
11063
        o = 0;
11064
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
11065
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
11066
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
11067
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
11068
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
11069
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
11070
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
11071
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
11072
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
11073
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
11074
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
11075
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
11076
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
11077
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
11078
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
11079
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
11080
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
11081
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
11082
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
11083
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
11084
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
11085
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
11086
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
11087
        r->dp[24] = l;
11088
        l = h;
11089
        h = o;
11090
        o = 0;
11091
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
11092
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
11093
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
11094
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
11095
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
11096
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
11097
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
11098
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
11099
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
11100
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
11101
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
11102
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
11103
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
11104
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
11105
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
11106
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
11107
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
11108
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
11109
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
11110
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
11111
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
11112
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
11113
        r->dp[25] = l;
11114
        l = h;
11115
        h = o;
11116
        o = 0;
11117
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
11118
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
11119
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
11120
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
11121
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
11122
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
11123
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
11124
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
11125
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
11126
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
11127
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
11128
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
11129
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
11130
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
11131
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
11132
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
11133
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
11134
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
11135
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
11136
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
11137
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
11138
        r->dp[26] = l;
11139
        l = h;
11140
        h = o;
11141
        o = 0;
11142
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
11143
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
11144
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
11145
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
11146
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
11147
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
11148
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
11149
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
11150
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
11151
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
11152
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
11153
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
11154
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
11155
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
11156
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
11157
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
11158
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
11159
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
11160
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
11161
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
11162
        r->dp[27] = l;
11163
        l = h;
11164
        h = o;
11165
        o = 0;
11166
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
11167
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
11168
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
11169
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
11170
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
11171
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
11172
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
11173
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
11174
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
11175
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
11176
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
11177
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
11178
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
11179
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
11180
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
11181
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
11182
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
11183
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
11184
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
11185
        r->dp[28] = l;
11186
        l = h;
11187
        h = o;
11188
        o = 0;
11189
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
11190
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
11191
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
11192
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
11193
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
11194
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
11195
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
11196
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
11197
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
11198
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
11199
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
11200
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
11201
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
11202
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
11203
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
11204
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
11205
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
11206
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
11207
        r->dp[29] = l;
11208
        l = h;
11209
        h = o;
11210
        o = 0;
11211
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
11212
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
11213
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
11214
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
11215
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
11216
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
11217
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
11218
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
11219
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
11220
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
11221
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
11222
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
11223
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
11224
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
11225
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
11226
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
11227
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
11228
        r->dp[30] = l;
11229
        l = h;
11230
        h = o;
11231
        o = 0;
11232
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
11233
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
11234
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
11235
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
11236
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
11237
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
11238
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
11239
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
11240
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
11241
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
11242
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
11243
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
11244
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
11245
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
11246
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
11247
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
11248
        r->dp[31] = l;
11249
        l = h;
11250
        h = o;
11251
        o = 0;
11252
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
11253
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
11254
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
11255
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
11256
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
11257
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
11258
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
11259
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
11260
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
11261
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
11262
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
11263
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
11264
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
11265
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
11266
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
11267
        r->dp[32] = l;
11268
        l = h;
11269
        h = o;
11270
        o = 0;
11271
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
11272
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
11273
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
11274
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
11275
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
11276
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
11277
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
11278
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
11279
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
11280
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
11281
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
11282
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
11283
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
11284
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
11285
        r->dp[33] = l;
11286
        l = h;
11287
        h = o;
11288
        o = 0;
11289
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
11290
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
11291
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
11292
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
11293
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
11294
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
11295
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
11296
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
11297
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
11298
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
11299
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
11300
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
11301
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
11302
        r->dp[34] = l;
11303
        l = h;
11304
        h = o;
11305
        o = 0;
11306
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
11307
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
11308
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
11309
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
11310
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
11311
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
11312
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
11313
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
11314
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
11315
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
11316
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
11317
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
11318
        r->dp[35] = l;
11319
        l = h;
11320
        h = o;
11321
        o = 0;
11322
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
11323
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
11324
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
11325
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
11326
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
11327
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
11328
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
11329
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
11330
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
11331
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
11332
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
11333
        r->dp[36] = l;
11334
        l = h;
11335
        h = o;
11336
        o = 0;
11337
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
11338
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
11339
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
11340
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
11341
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
11342
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
11343
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
11344
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
11345
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
11346
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
11347
        r->dp[37] = l;
11348
        l = h;
11349
        h = o;
11350
        o = 0;
11351
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
11352
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
11353
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
11354
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
11355
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
11356
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
11357
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
11358
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
11359
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
11360
        r->dp[38] = l;
11361
        l = h;
11362
        h = o;
11363
        o = 0;
11364
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
11365
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
11366
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
11367
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
11368
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
11369
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
11370
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
11371
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
11372
        r->dp[39] = l;
11373
        l = h;
11374
        h = o;
11375
        o = 0;
11376
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
11377
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
11378
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
11379
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
11380
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
11381
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
11382
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
11383
        r->dp[40] = l;
11384
        l = h;
11385
        h = o;
11386
        o = 0;
11387
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
11388
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
11389
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
11390
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
11391
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
11392
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
11393
        r->dp[41] = l;
11394
        l = h;
11395
        h = o;
11396
        o = 0;
11397
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
11398
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
11399
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
11400
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
11401
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
11402
        r->dp[42] = l;
11403
        l = h;
11404
        h = o;
11405
        o = 0;
11406
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
11407
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
11408
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
11409
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
11410
        r->dp[43] = l;
11411
        l = h;
11412
        h = o;
11413
        o = 0;
11414
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
11415
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
11416
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
11417
        r->dp[44] = l;
11418
        l = h;
11419
        h = o;
11420
        o = 0;
11421
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
11422
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
11423
        r->dp[45] = l;
11424
        l = h;
11425
        h = o;
11426
        SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
11427
        r->dp[46] = l;
11428
        r->dp[47] = h;
11429
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
11430
        r->used = 48;
11431
        sp_clamp(r);
11432
    }
11433
11434
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
11435
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
11436
#endif
11437
    return err;
11438
}
11439
    #endif /* SP_INT_DIGITS >= 48 */
11440
11441
    #if SP_INT_DIGITS >= 64
11442
/* Multiply a by b and store in r: r = a * b
11443
 *
11444
 * Karatsuba implementation.
11445
 *
11446
 * @param [in]  a  SP integer to multiply.
11447
 * @param [in]  b  SP integer to multiply.
11448
 * @param [out] r  SP integer result.
11449
 *
11450
 * @return  MP_OKAY on success.
11451
 * @return  MP_MEM when dynamic memory allocation fails.
11452
 */
11453
static int _sp_mul_32(const sp_int* a, const sp_int* b, sp_int* r)
11454
{
11455
    int err = MP_OKAY;
11456
    unsigned int i;
11457
    sp_int_digit l;
11458
    sp_int_digit h;
11459
    sp_int* a1;
11460
    sp_int* b1;
11461
    sp_int* z0;
11462
    sp_int* z1;
11463
    sp_int* z2;
11464
    sp_int_digit ca;
11465
    sp_int_digit cb;
11466
    DECL_SP_INT_ARRAY(t, 16, 2);
11467
    DECL_SP_INT_ARRAY(z, 33, 2);
11468
11469
    ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
11470
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
11471
    if (err == MP_OKAY) {
11472
        a1 = t[0];
11473
        b1 = t[1];
11474
        z1 = z[0];
11475
        z2 = z[1];
11476
        z0 = r;
11477
11478
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
11479
        a1->used = 16;
11480
        XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
11481
        b1->used = 16;
11482
11483
        /* z2 = a1 * b1 */
11484
        err = _sp_mul_16(a1, b1, z2);
11485
    }
11486
    if (err == MP_OKAY) {
11487
        l = a1->dp[0];
11488
        h = 0;
11489
        SP_ASM_ADDC(l, h, a->dp[0]);
11490
        a1->dp[0] = l;
11491
        l = h;
11492
        h = 0;
11493
        for (i = 1; i < 16; i++) {
11494
            SP_ASM_ADDC(l, h, a1->dp[i]);
11495
            SP_ASM_ADDC(l, h, a->dp[i]);
11496
            a1->dp[i] = l;
11497
            l = h;
11498
            h = 0;
11499
        }
11500
        ca = l;
11501
        /* b01 = b0 + b1 */
11502
        l = b1->dp[0];
11503
        h = 0;
11504
        SP_ASM_ADDC(l, h, b->dp[0]);
11505
        b1->dp[0] = l;
11506
        l = h;
11507
        h = 0;
11508
        for (i = 1; i < 16; i++) {
11509
            SP_ASM_ADDC(l, h, b1->dp[i]);
11510
            SP_ASM_ADDC(l, h, b->dp[i]);
11511
            b1->dp[i] = l;
11512
            l = h;
11513
            h = 0;
11514
        }
11515
        cb = l;
11516
11517
        /* z0 = a0 * b0 */
11518
        err = _sp_mul_16(a, b, z0);
11519
    }
11520
    if (err == MP_OKAY) {
11521
        /* z1 = (a0 + a1) * (b0 + b1) */
11522
        err = _sp_mul_16(a1, b1, z1);
11523
    }
11524
    if (err == MP_OKAY) {
11525
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
11526
        /* r = z0 */
11527
        /* r += (z1 - z0 - z2) << 16 */
11528
        z1->dp[32] = ca & cb;
11529
        l = 0;
11530
        if (ca) {
11531
            h = 0;
11532
            for (i = 0; i < 16; i++) {
11533
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11534
                SP_ASM_ADDC(l, h, b1->dp[i]);
11535
                z1->dp[i + 16] = l;
11536
                l = h;
11537
                h = 0;
11538
            }
11539
        }
11540
        z1->dp[32] += l;
11541
        l = 0;
11542
        if (cb) {
11543
            h = 0;
11544
            for (i = 0; i < 16; i++) {
11545
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11546
                SP_ASM_ADDC(l, h, a1->dp[i]);
11547
                z1->dp[i + 16] = l;
11548
                l = h;
11549
                h = 0;
11550
            }
11551
        }
11552
        z1->dp[32] += l;
11553
        /* z1 = z1 - z0 - z2 */
11554
        l = 0;
11555
        h = 0;
11556
        for (i = 0; i < 32; i++) {
11557
            l += z1->dp[i];
11558
            SP_ASM_SUBB(l, h, z0->dp[i]);
11559
            SP_ASM_SUBB(l, h, z2->dp[i]);
11560
            z1->dp[i] = l;
11561
            l = h;
11562
            h = 0;
11563
        }
11564
        z1->dp[i] += l;
11565
        /* r += z1 << 16 */
11566
        l = 0;
11567
        h = 0;
11568
        for (i = 0; i < 16; i++) {
11569
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
11570
            SP_ASM_ADDC(l, h, z1->dp[i]);
11571
            r->dp[i + 16] = l;
11572
            l = h;
11573
            h = 0;
11574
        }
11575
        for (; i < 33; i++) {
11576
            SP_ASM_ADDC(l, h, z1->dp[i]);
11577
            r->dp[i + 16] = l;
11578
            l = h;
11579
            h = 0;
11580
        }
11581
        /* r += z2 << 32  */
11582
        l = 0;
11583
        h = 0;
11584
        for (i = 0; i < 17; i++) {
11585
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
11586
            SP_ASM_ADDC(l, h, z2->dp[i]);
11587
            r->dp[i + 32] = l;
11588
            l = h;
11589
            h = 0;
11590
        }
11591
        for (; i < 32; i++) {
11592
            SP_ASM_ADDC(l, h, z2->dp[i]);
11593
            r->dp[i + 32] = l;
11594
            l = h;
11595
            h = 0;
11596
        }
11597
        r->used = 64;
11598
        sp_clamp(r);
11599
    }
11600
11601
    FREE_SP_INT_ARRAY(z, NULL);
11602
    FREE_SP_INT_ARRAY(t, NULL);
11603
    return err;
11604
}
11605
    #endif /* SP_INT_DIGITS >= 64 */
11606
11607
    #if SP_INT_DIGITS >= 96
11608
/* Multiply a by b and store in r: r = a * b
11609
 *
11610
 * Karatsuba implementation.
11611
 *
11612
 * @param [in]  a  SP integer to multiply.
11613
 * @param [in]  b  SP integer to multiply.
11614
 * @param [out] r  SP integer result.
11615
 *
11616
 * @return  MP_OKAY on success.
11617
 * @return  MP_MEM when dynamic memory allocation fails.
11618
 */
11619
static int _sp_mul_48(const sp_int* a, const sp_int* b, sp_int* r)
11620
{
11621
    int err = MP_OKAY;
11622
    unsigned int i;
11623
    sp_int_digit l;
11624
    sp_int_digit h;
11625
    sp_int* a1;
11626
    sp_int* b1;
11627
    sp_int* z0;
11628
    sp_int* z1;
11629
    sp_int* z2;
11630
    sp_int_digit ca;
11631
    sp_int_digit cb;
11632
    DECL_SP_INT_ARRAY(t, 24, 2);
11633
    DECL_SP_INT_ARRAY(z, 49, 2);
11634
11635
    ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
11636
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
11637
    if (err == MP_OKAY) {
11638
        a1 = t[0];
11639
        b1 = t[1];
11640
        z1 = z[0];
11641
        z2 = z[1];
11642
        z0 = r;
11643
11644
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
11645
        a1->used = 24;
11646
        XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
11647
        b1->used = 24;
11648
11649
        /* z2 = a1 * b1 */
11650
        err = _sp_mul_24(a1, b1, z2);
11651
    }
11652
    if (err == MP_OKAY) {
11653
        l = a1->dp[0];
11654
        h = 0;
11655
        SP_ASM_ADDC(l, h, a->dp[0]);
11656
        a1->dp[0] = l;
11657
        l = h;
11658
        h = 0;
11659
        for (i = 1; i < 24; i++) {
11660
            SP_ASM_ADDC(l, h, a1->dp[i]);
11661
            SP_ASM_ADDC(l, h, a->dp[i]);
11662
            a1->dp[i] = l;
11663
            l = h;
11664
            h = 0;
11665
        }
11666
        ca = l;
11667
        /* b01 = b0 + b1 */
11668
        l = b1->dp[0];
11669
        h = 0;
11670
        SP_ASM_ADDC(l, h, b->dp[0]);
11671
        b1->dp[0] = l;
11672
        l = h;
11673
        h = 0;
11674
        for (i = 1; i < 24; i++) {
11675
            SP_ASM_ADDC(l, h, b1->dp[i]);
11676
            SP_ASM_ADDC(l, h, b->dp[i]);
11677
            b1->dp[i] = l;
11678
            l = h;
11679
            h = 0;
11680
        }
11681
        cb = l;
11682
11683
        /* z0 = a0 * b0 */
11684
        err = _sp_mul_24(a, b, z0);
11685
    }
11686
    if (err == MP_OKAY) {
11687
        /* z1 = (a0 + a1) * (b0 + b1) */
11688
        err = _sp_mul_24(a1, b1, z1);
11689
    }
11690
    if (err == MP_OKAY) {
11691
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
11692
        /* r = z0 */
11693
        /* r += (z1 - z0 - z2) << 24 */
11694
        z1->dp[48] = ca & cb;
11695
        l = 0;
11696
        if (ca) {
11697
            h = 0;
11698
            for (i = 0; i < 24; i++) {
11699
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11700
                SP_ASM_ADDC(l, h, b1->dp[i]);
11701
                z1->dp[i + 24] = l;
11702
                l = h;
11703
                h = 0;
11704
            }
11705
        }
11706
        z1->dp[48] += l;
11707
        l = 0;
11708
        if (cb) {
11709
            h = 0;
11710
            for (i = 0; i < 24; i++) {
11711
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11712
                SP_ASM_ADDC(l, h, a1->dp[i]);
11713
                z1->dp[i + 24] = l;
11714
                l = h;
11715
                h = 0;
11716
            }
11717
        }
11718
        z1->dp[48] += l;
11719
        /* z1 = z1 - z0 - z2 */
11720
        l = 0;
11721
        h = 0;
11722
        for (i = 0; i < 48; i++) {
11723
            l += z1->dp[i];
11724
            SP_ASM_SUBB(l, h, z0->dp[i]);
11725
            SP_ASM_SUBB(l, h, z2->dp[i]);
11726
            z1->dp[i] = l;
11727
            l = h;
11728
            h = 0;
11729
        }
11730
        z1->dp[i] += l;
11731
        /* r += z1 << 24 */
11732
        l = 0;
11733
        h = 0;
11734
        for (i = 0; i < 24; i++) {
11735
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
11736
            SP_ASM_ADDC(l, h, z1->dp[i]);
11737
            r->dp[i + 24] = l;
11738
            l = h;
11739
            h = 0;
11740
        }
11741
        for (; i < 49; i++) {
11742
            SP_ASM_ADDC(l, h, z1->dp[i]);
11743
            r->dp[i + 24] = l;
11744
            l = h;
11745
            h = 0;
11746
        }
11747
        /* r += z2 << 48  */
11748
        l = 0;
11749
        h = 0;
11750
        for (i = 0; i < 25; i++) {
11751
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
11752
            SP_ASM_ADDC(l, h, z2->dp[i]);
11753
            r->dp[i + 48] = l;
11754
            l = h;
11755
            h = 0;
11756
        }
11757
        for (; i < 48; i++) {
11758
            SP_ASM_ADDC(l, h, z2->dp[i]);
11759
            r->dp[i + 48] = l;
11760
            l = h;
11761
            h = 0;
11762
        }
11763
        r->used = 96;
11764
        sp_clamp(r);
11765
    }
11766
11767
    FREE_SP_INT_ARRAY(z, NULL);
11768
    FREE_SP_INT_ARRAY(t, NULL);
11769
    return err;
11770
}
11771
    #endif /* SP_INT_DIGITS >= 96 */
11772
11773
    #if SP_INT_DIGITS >= 128
11774
/* Multiply a by b and store in r: r = a * b
11775
 *
11776
 * Karatsuba implementation.
11777
 *
11778
 * @param [in]  a  SP integer to multiply.
11779
 * @param [in]  b  SP integer to multiply.
11780
 * @param [out] r  SP integer result.
11781
 *
11782
 * @return  MP_OKAY on success.
11783
 * @return  MP_MEM when dynamic memory allocation fails.
11784
 */
11785
static int _sp_mul_64(const sp_int* a, const sp_int* b, sp_int* r)
11786
{
11787
    int err = MP_OKAY;
11788
    unsigned int i;
11789
    sp_int_digit l;
11790
    sp_int_digit h;
11791
    sp_int* a1;
11792
    sp_int* b1;
11793
    sp_int* z0;
11794
    sp_int* z1;
11795
    sp_int* z2;
11796
    sp_int_digit ca;
11797
    sp_int_digit cb;
11798
    DECL_SP_INT_ARRAY(t, 32, 2);
11799
    DECL_SP_INT_ARRAY(z, 65, 2);
11800
11801
    ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
11802
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
11803
    if (err == MP_OKAY) {
11804
        a1 = t[0];
11805
        b1 = t[1];
11806
        z1 = z[0];
11807
        z2 = z[1];
11808
        z0 = r;
11809
11810
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
11811
        a1->used = 32;
11812
        XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
11813
        b1->used = 32;
11814
11815
        /* z2 = a1 * b1 */
11816
        err = _sp_mul_32(a1, b1, z2);
11817
    }
11818
    if (err == MP_OKAY) {
11819
        l = a1->dp[0];
11820
        h = 0;
11821
        SP_ASM_ADDC(l, h, a->dp[0]);
11822
        a1->dp[0] = l;
11823
        l = h;
11824
        h = 0;
11825
        for (i = 1; i < 32; i++) {
11826
            SP_ASM_ADDC(l, h, a1->dp[i]);
11827
            SP_ASM_ADDC(l, h, a->dp[i]);
11828
            a1->dp[i] = l;
11829
            l = h;
11830
            h = 0;
11831
        }
11832
        ca = l;
11833
        /* b01 = b0 + b1 */
11834
        l = b1->dp[0];
11835
        h = 0;
11836
        SP_ASM_ADDC(l, h, b->dp[0]);
11837
        b1->dp[0] = l;
11838
        l = h;
11839
        h = 0;
11840
        for (i = 1; i < 32; i++) {
11841
            SP_ASM_ADDC(l, h, b1->dp[i]);
11842
            SP_ASM_ADDC(l, h, b->dp[i]);
11843
            b1->dp[i] = l;
11844
            l = h;
11845
            h = 0;
11846
        }
11847
        cb = l;
11848
11849
        /* z0 = a0 * b0 */
11850
        err = _sp_mul_32(a, b, z0);
11851
    }
11852
    if (err == MP_OKAY) {
11853
        /* z1 = (a0 + a1) * (b0 + b1) */
11854
        err = _sp_mul_32(a1, b1, z1);
11855
    }
11856
    if (err == MP_OKAY) {
11857
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
11858
        /* r = z0 */
11859
        /* r += (z1 - z0 - z2) << 32 */
11860
        z1->dp[64] = ca & cb;
11861
        l = 0;
11862
        if (ca) {
11863
            h = 0;
11864
            for (i = 0; i < 32; i++) {
11865
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11866
                SP_ASM_ADDC(l, h, b1->dp[i]);
11867
                z1->dp[i + 32] = l;
11868
                l = h;
11869
                h = 0;
11870
            }
11871
        }
11872
        z1->dp[64] += l;
11873
        l = 0;
11874
        if (cb) {
11875
            h = 0;
11876
            for (i = 0; i < 32; i++) {
11877
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11878
                SP_ASM_ADDC(l, h, a1->dp[i]);
11879
                z1->dp[i + 32] = l;
11880
                l = h;
11881
                h = 0;
11882
            }
11883
        }
11884
        z1->dp[64] += l;
11885
        /* z1 = z1 - z0 - z2 */
11886
        l = 0;
11887
        h = 0;
11888
        for (i = 0; i < 64; i++) {
11889
            l += z1->dp[i];
11890
            SP_ASM_SUBB(l, h, z0->dp[i]);
11891
            SP_ASM_SUBB(l, h, z2->dp[i]);
11892
            z1->dp[i] = l;
11893
            l = h;
11894
            h = 0;
11895
        }
11896
        z1->dp[i] += l;
11897
        /* r += z1 << 32 */
11898
        l = 0;
11899
        h = 0;
11900
        for (i = 0; i < 32; i++) {
11901
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
11902
            SP_ASM_ADDC(l, h, z1->dp[i]);
11903
            r->dp[i + 32] = l;
11904
            l = h;
11905
            h = 0;
11906
        }
11907
        for (; i < 65; i++) {
11908
            SP_ASM_ADDC(l, h, z1->dp[i]);
11909
            r->dp[i + 32] = l;
11910
            l = h;
11911
            h = 0;
11912
        }
11913
        /* r += z2 << 64  */
11914
        l = 0;
11915
        h = 0;
11916
        for (i = 0; i < 33; i++) {
11917
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
11918
            SP_ASM_ADDC(l, h, z2->dp[i]);
11919
            r->dp[i + 64] = l;
11920
            l = h;
11921
            h = 0;
11922
        }
11923
        for (; i < 64; i++) {
11924
            SP_ASM_ADDC(l, h, z2->dp[i]);
11925
            r->dp[i + 64] = l;
11926
            l = h;
11927
            h = 0;
11928
        }
11929
        r->used = 128;
11930
        sp_clamp(r);
11931
    }
11932
11933
    FREE_SP_INT_ARRAY(z, NULL);
11934
    FREE_SP_INT_ARRAY(t, NULL);
11935
    return err;
11936
}
11937
    #endif /* SP_INT_DIGITS >= 128 */
11938
11939
    #if SP_INT_DIGITS >= 192
11940
/* Multiply a by b and store in r: r = a * b
11941
 *
11942
 * Karatsuba implementation.
11943
 *
11944
 * @param [in]  a  SP integer to multiply.
11945
 * @param [in]  b  SP integer to multiply.
11946
 * @param [out] r  SP integer result.
11947
 *
11948
 * @return  MP_OKAY on success.
11949
 * @return  MP_MEM when dynamic memory allocation fails.
11950
 */
11951
static int _sp_mul_96(const sp_int* a, const sp_int* b, sp_int* r)
11952
{
11953
    int err = MP_OKAY;
11954
    unsigned int i;
11955
    sp_int_digit l;
11956
    sp_int_digit h;
11957
    sp_int* a1;
11958
    sp_int* b1;
11959
    sp_int* z0;
11960
    sp_int* z1;
11961
    sp_int* z2;
11962
    sp_int_digit ca;
11963
    sp_int_digit cb;
11964
    DECL_SP_INT_ARRAY(t, 48, 2);
11965
    DECL_SP_INT_ARRAY(z, 97, 2);
11966
11967
    ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
11968
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
11969
    if (err == MP_OKAY) {
11970
        a1 = t[0];
11971
        b1 = t[1];
11972
        z1 = z[0];
11973
        z2 = z[1];
11974
        z0 = r;
11975
11976
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
11977
        a1->used = 48;
11978
        XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
11979
        b1->used = 48;
11980
11981
        /* z2 = a1 * b1 */
11982
        err = _sp_mul_48(a1, b1, z2);
11983
    }
11984
    if (err == MP_OKAY) {
11985
        l = a1->dp[0];
11986
        h = 0;
11987
        SP_ASM_ADDC(l, h, a->dp[0]);
11988
        a1->dp[0] = l;
11989
        l = h;
11990
        h = 0;
11991
        for (i = 1; i < 48; i++) {
11992
            SP_ASM_ADDC(l, h, a1->dp[i]);
11993
            SP_ASM_ADDC(l, h, a->dp[i]);
11994
            a1->dp[i] = l;
11995
            l = h;
11996
            h = 0;
11997
        }
11998
        ca = l;
11999
        /* b01 = b0 + b1 */
12000
        l = b1->dp[0];
12001
        h = 0;
12002
        SP_ASM_ADDC(l, h, b->dp[0]);
12003
        b1->dp[0] = l;
12004
        l = h;
12005
        h = 0;
12006
        for (i = 1; i < 48; i++) {
12007
            SP_ASM_ADDC(l, h, b1->dp[i]);
12008
            SP_ASM_ADDC(l, h, b->dp[i]);
12009
            b1->dp[i] = l;
12010
            l = h;
12011
            h = 0;
12012
        }
12013
        cb = l;
12014
12015
        /* z0 = a0 * b0 */
12016
        err = _sp_mul_48(a, b, z0);
12017
    }
12018
    if (err == MP_OKAY) {
12019
        /* z1 = (a0 + a1) * (b0 + b1) */
12020
        err = _sp_mul_48(a1, b1, z1);
12021
    }
12022
    if (err == MP_OKAY) {
12023
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
12024
        /* r = z0 */
12025
        /* r += (z1 - z0 - z2) << 48 */
12026
        z1->dp[96] = ca & cb;
12027
        l = 0;
12028
        if (ca) {
12029
            h = 0;
12030
            for (i = 0; i < 48; i++) {
12031
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
12032
                SP_ASM_ADDC(l, h, b1->dp[i]);
12033
                z1->dp[i + 48] = l;
12034
                l = h;
12035
                h = 0;
12036
            }
12037
        }
12038
        z1->dp[96] += l;
12039
        l = 0;
12040
        if (cb) {
12041
            h = 0;
12042
            for (i = 0; i < 48; i++) {
12043
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
12044
                SP_ASM_ADDC(l, h, a1->dp[i]);
12045
                z1->dp[i + 48] = l;
12046
                l = h;
12047
                h = 0;
12048
            }
12049
        }
12050
        z1->dp[96] += l;
12051
        /* z1 = z1 - z0 - z2 */
12052
        l = 0;
12053
        h = 0;
12054
        for (i = 0; i < 96; i++) {
12055
            l += z1->dp[i];
12056
            SP_ASM_SUBB(l, h, z0->dp[i]);
12057
            SP_ASM_SUBB(l, h, z2->dp[i]);
12058
            z1->dp[i] = l;
12059
            l = h;
12060
            h = 0;
12061
        }
12062
        z1->dp[i] += l;
12063
        /* r += z1 << 48 */
12064
        l = 0;
12065
        h = 0;
12066
        for (i = 0; i < 48; i++) {
12067
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
12068
            SP_ASM_ADDC(l, h, z1->dp[i]);
12069
            r->dp[i + 48] = l;
12070
            l = h;
12071
            h = 0;
12072
        }
12073
        for (; i < 97; i++) {
12074
            SP_ASM_ADDC(l, h, z1->dp[i]);
12075
            r->dp[i + 48] = l;
12076
            l = h;
12077
            h = 0;
12078
        }
12079
        /* r += z2 << 96  */
12080
        l = 0;
12081
        h = 0;
12082
        for (i = 0; i < 49; i++) {
12083
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
12084
            SP_ASM_ADDC(l, h, z2->dp[i]);
12085
            r->dp[i + 96] = l;
12086
            l = h;
12087
            h = 0;
12088
        }
12089
        for (; i < 96; i++) {
12090
            SP_ASM_ADDC(l, h, z2->dp[i]);
12091
            r->dp[i + 96] = l;
12092
            l = h;
12093
            h = 0;
12094
        }
12095
        r->used = 192;
12096
        sp_clamp(r);
12097
    }
12098
12099
    FREE_SP_INT_ARRAY(z, NULL);
12100
    FREE_SP_INT_ARRAY(t, NULL);
12101
    return err;
12102
}
12103
    #endif /* SP_INT_DIGITS >= 192 */
12104
12105
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
12106
#endif /* !WOLFSSL_SP_SMALL */
12107
12108
/* Multiply a by b and store in r: r = a * b
12109
 *
12110
 * @param [in]  a  SP integer to multiply.
12111
 * @param [in]  b  SP integer to multiply.
12112
 * @param [out] r  SP integer result.
12113
 *
12114
 * @return  MP_OKAY on success.
12115
 * @return  MP_VAL when a, b or r is NULL; or the result will be too big for
12116
 *          fixed data length.
12117
 * @return  MP_MEM when dynamic memory allocation fails.
12118
 */
12119
int sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
12120
0
{
12121
0
    int err = MP_OKAY;
12122
#ifdef WOLFSSL_SP_INT_NEGATIVE
12123
    sp_uint8 sign = MP_ZPOS;
12124
#endif
12125
12126
0
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
12127
0
        err = MP_VAL;
12128
0
    }
12129
12130
    /* Need extra digit during calculation. */
12131
    /* NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) */
12132
    /* clang-tidy falsely believes that r->size was corrupted by the _sp_copy()
12133
     * to "Copy base into working variable" in _sp_exptmod_ex().
12134
     */
12135
0
    if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
12136
0
        err = MP_VAL;
12137
0
    }
12138
    /* NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) */
12139
12140
#if 0
12141
    if (err == MP_OKAY) {
12142
        sp_print(a, "a");
12143
        sp_print(b, "b");
12144
    }
12145
#endif
12146
12147
0
    if (err == MP_OKAY) {
12148
    #ifdef WOLFSSL_SP_INT_NEGATIVE
12149
        sign = a->sign ^ b->sign;
12150
    #endif
12151
12152
0
        if ((a->used == 0) || (b->used == 0)) {
12153
0
            _sp_zero(r);
12154
0
        }
12155
0
        else
12156
0
#ifndef WOLFSSL_SP_SMALL
12157
0
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
12158
0
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
12159
0
        if ((a->used == 4) && (b->used == 4)) {
12160
0
            err = _sp_mul_4(a, b, r);
12161
0
        }
12162
0
        else
12163
0
#endif /* SP_WORD_SIZE == 64 */
12164
0
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
12165
0
#ifdef SQR_MUL_ASM
12166
0
        if ((a->used == 6) && (b->used == 6)) {
12167
0
            err = _sp_mul_6(a, b, r);
12168
0
        }
12169
0
        else
12170
0
#endif /* SQR_MUL_ASM */
12171
0
#endif /* SP_WORD_SIZE == 64 */
12172
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
12173
#ifdef SQR_MUL_ASM
12174
        if ((a->used == 8) && (b->used == 8)) {
12175
            err = _sp_mul_8(a, b, r);
12176
        }
12177
        else
12178
#endif /* SQR_MUL_ASM */
12179
#endif /* SP_WORD_SIZE == 32 */
12180
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
12181
#ifdef SQR_MUL_ASM
12182
        if ((a->used == 12) && (b->used == 12)) {
12183
            err = _sp_mul_12(a, b, r);
12184
        }
12185
        else
12186
#endif /* SQR_MUL_ASM */
12187
#endif /* SP_WORD_SIZE == 32 */
12188
0
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
12189
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
12190
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
12191
    (SP_WORD_SIZE == 64)))
12192
    #if SP_INT_DIGITS >= 32
12193
        if ((a->used == 16) && (b->used == 16)) {
12194
            err = _sp_mul_16(a, b, r);
12195
        }
12196
        else
12197
    #endif /* SP_INT_DIGITS >= 32 */
12198
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
12199
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
12200
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
12201
    #if SP_INT_DIGITS >= 48
12202
        if ((a->used == 24) && (b->used == 24)) {
12203
            err = _sp_mul_24(a, b, r);
12204
        }
12205
        else
12206
    #endif /* SP_INT_DIGITS >= 48 */
12207
    #if SP_INT_DIGITS >= 64
12208
        if ((a->used == 32) && (b->used == 32)) {
12209
            err = _sp_mul_32(a, b, r);
12210
        }
12211
        else
12212
    #endif /* SP_INT_DIGITS >= 64 */
12213
    #if SP_INT_DIGITS >= 96
12214
        if ((a->used == 48) && (b->used == 48)) {
12215
            err = _sp_mul_48(a, b, r);
12216
        }
12217
        else
12218
    #endif /* SP_INT_DIGITS >= 96 */
12219
    #if SP_INT_DIGITS >= 128
12220
        if ((a->used == 64) && (b->used == 64)) {
12221
            err = _sp_mul_64(a, b, r);
12222
        }
12223
        else
12224
    #endif /* SP_INT_DIGITS >= 128 */
12225
    #if SP_INT_DIGITS >= 192
12226
        if ((a->used == 96) && (b->used == 96)) {
12227
            err = _sp_mul_96(a, b, r);
12228
        }
12229
        else
12230
    #endif /* SP_INT_DIGITS >= 192 */
12231
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
12232
0
#endif /* !WOLFSSL_SP_SMALL */
12233
12234
0
#ifdef SQR_MUL_ASM
12235
0
        if (a->used == b->used) {
12236
0
            err = _sp_mul_nxn(a, b, r);
12237
0
        }
12238
0
        else
12239
0
#endif
12240
0
        {
12241
0
            err = _sp_mul(a, b, r);
12242
0
        }
12243
0
    }
12244
12245
#ifdef WOLFSSL_SP_INT_NEGATIVE
12246
    if (err == MP_OKAY) {
12247
        r->sign = (r->used == 0) ? MP_ZPOS : sign;
12248
    }
12249
#endif
12250
12251
#if 0
12252
    if (err == MP_OKAY) {
12253
        sp_print(r, "rmul");
12254
    }
12255
#endif
12256
12257
0
    return err;
12258
0
}
12259
/* END SP_MUL implementations. */
12260
12261
#endif
12262
12263
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
12264
    defined(WOLFCRYPT_HAVE_ECCSI) || \
12265
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || defined(OPENSSL_ALL)
12266
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12267
 *
12268
 * @param [in]  a  SP integer to multiply.
12269
 * @param [in]  b  SP integer to multiply.
12270
 * @param [in]  m  SP integer that is the modulus.
12271
 * @param [out] r  SP integer result.
12272
 *
12273
 * @return  MP_OKAY on success.
12274
 * @return  MP_MEM when dynamic memory allocation fails.
12275
 */
12276
static int _sp_mulmod_tmp(const sp_int* a, const sp_int* b, const sp_int* m,
12277
    sp_int* r)
12278
0
{
12279
0
    int err = MP_OKAY;
12280
12281
0
    if (sp_iszero(a) || sp_iszero(b)) {
12282
0
        _sp_zero(r);
12283
0
    }
12284
0
    else {
12285
        /* Create temporary for multiplication result. */
12286
0
        DECL_SP_INT(t, a->used + b->used);
12287
12288
0
        ALLOC_SP_INT(t, a->used + b->used, err, NULL);
12289
0
        if (err == MP_OKAY) {
12290
0
            err = sp_init_size(t, (sp_size_t)(a->used + b->used));
12291
0
        }
12292
12293
        /* Multiply and reduce. */
12294
0
        if (err == MP_OKAY) {
12295
0
            err = sp_mul(a, b, t);
12296
0
        }
12297
0
        if (err == MP_OKAY) {
12298
0
            err = sp_mod(t, m, r);
12299
0
        }
12300
12301
        /* Dispose of an allocated SP int. */
12302
0
        FREE_SP_INT(t, NULL);
12303
0
    }
12304
12305
0
    return err;
12306
0
}
12307
12308
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12309
 *
12310
 * @param [in]  a  SP integer to multiply.
12311
 * @param [in]  b  SP integer to multiply.
12312
 * @param [in]  m  SP integer that is the modulus.
12313
 * @param [out] r  SP integer result.
12314
 *
12315
 * @return  MP_OKAY on success.
12316
 * @return  MP_MEM when dynamic memory allocation fails.
12317
 */
12318
static int _sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m,
12319
    sp_int* r)
12320
0
{
12321
0
    int err = MP_OKAY;
12322
12323
    /* Use r as intermediate result if not same as pointer m which is needed
12324
     * after first intermediate result.
12325
     */
12326
0
    if (r != m) {
12327
        /* Multiply and reduce. */
12328
0
        err = sp_mul(a, b, r);
12329
0
        if (err == MP_OKAY) {
12330
0
            err = sp_mod(r, m, r);
12331
0
        }
12332
0
    }
12333
0
    else {
12334
        /* Do operation using temporary. */
12335
0
        err = _sp_mulmod_tmp(a, b, m, r);
12336
0
    }
12337
12338
0
    return err;
12339
0
}
12340
12341
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12342
 *
12343
 * @param [in]  a  SP integer to multiply.
12344
 * @param [in]  b  SP integer to multiply.
12345
 * @param [in]  m  SP integer that is the modulus.
12346
 * @param [out] r  SP integer result.
12347
 *
12348
 * @return  MP_OKAY on success.
12349
 * @return  MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
12350
 *          fixed data length.
12351
 * @return  MP_MEM when dynamic memory allocation fails.
12352
 */
12353
int sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
12354
0
{
12355
0
    int err = MP_OKAY;
12356
12357
    /* Validate parameters. */
12358
0
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
12359
0
        err = MP_VAL;
12360
0
    }
12361
    /* Ensure result SP int is big enough for intermediates. */
12362
0
    if ((err == MP_OKAY) && (r != m) && (a->used + b->used > r->size)) {
12363
0
        err = MP_VAL;
12364
0
    }
12365
12366
#if 0
12367
    if (err == 0) {
12368
        sp_print(a, "a");
12369
        sp_print(b, "b");
12370
        sp_print(m, "m");
12371
    }
12372
#endif
12373
12374
0
    if (err == MP_OKAY) {
12375
0
        err = _sp_mulmod(a, b, m, r);
12376
0
    }
12377
12378
#if 0
12379
    if (err == 0) {
12380
        sp_print(r, "rmm");
12381
    }
12382
#endif
12383
12384
0
    return err;
12385
0
}
12386
#endif
12387
12388
#ifdef WOLFSSL_SP_INVMOD
12389
/* Calculates the multiplicative inverse in the field. r*a = x*m + 1
12390
 * Right-shift Algorithm. NOT constant time.
12391
 *
12392
 * Algorithm:
12393
 *   1. u = m, v = a, b = 0, c = 1
12394
 *   2. While v != 1 and u != 0
12395
 *     2.1. If u even
12396
 *       2.1.1. u /= 2
12397
 *       2.1.2. b = (b / 2) mod m
12398
 *     2.2. Else if v even
12399
 *       2.2.1. v /= 2
12400
 *       2.2.2. c = (c / 2) mod m
12401
 *     2.3. Else if u >= v
12402
 *       2.3.1. u -= v
12403
 *       2.3.2. b = (b - c) mod m
12404
 *     2.4. Else (v > u)
12405
 *       2.4.1. v -= u
12406
 *       2.4.2. c = (c - b) mod m
12407
 *  3. NO_INVERSE if u == 0
12408
 *
12409
 * @param [in]      a  SP integer to find inverse of.
12410
 * @param [in]      m  SP integer that is the modulus.
12411
 * @param [in, out] u  SP integer to use in calculation.
12412
 * @param [in, out] v  SP integer to use in calculation.
12413
 * @param [in, out] b  SP integer to use in calculation.
12414
 * @param [in, out] c  SP integer that is the inverse.
12415
 *
12416
 * @return  MP_OKAY on success.
12417
 * @return  MP_VAL when no inverse.
12418
 */
12419
static int _sp_invmod_bin(const sp_int* a, const sp_int* m, sp_int* u,
12420
    sp_int* v, sp_int* b, sp_int* c)
12421
0
{
12422
0
    int err = MP_OKAY;
12423
12424
    /* 1. u = m, v = a, b = 0, c = 1 */
12425
0
    _sp_copy(m, u);
12426
0
    if (a != v) {
12427
0
        _sp_copy(a, v);
12428
0
    }
12429
0
    _sp_zero(b);
12430
0
    _sp_set(c, 1);
12431
12432
    /* 2. While v != 1 and u != 0 */
12433
0
    while (!sp_isone(v) && !sp_iszero(u)) {
12434
        /* 2.1. If u even */
12435
0
        if ((u->dp[0] & 1) == 0) {
12436
            /* 2.1.1. u /= 2 */
12437
0
            _sp_div_2(u, u);
12438
            /* 2.1.2. b = (b / 2) mod m */
12439
0
            if (sp_isodd(b)) {
12440
0
                _sp_add_off(b, m, b, 0);
12441
0
            }
12442
0
            _sp_div_2(b, b);
12443
0
        }
12444
        /* 2.2. Else if v even */
12445
0
        else if ((v->dp[0] & 1) == 0) {
12446
            /* 2.2.1. v /= 2 */
12447
0
            _sp_div_2(v, v);
12448
            /* 2.2.2. c = (c / 2) mod m */
12449
0
            if (sp_isodd(c)) {
12450
0
                _sp_add_off(c, m, c, 0);
12451
0
            }
12452
0
            _sp_div_2(c, c);
12453
0
        }
12454
        /* 2.3. Else if u >= v */
12455
0
        else if (_sp_cmp_abs(u, v) != MP_LT) {
12456
            /* 2.3.1. u -= v */
12457
0
            _sp_sub_off(u, v, u, 0);
12458
            /* 2.3.2. b = (b - c) mod m */
12459
0
            if (_sp_cmp_abs(b, c) == MP_LT) {
12460
0
                _sp_add_off(b, m, b, 0);
12461
0
            }
12462
0
            _sp_sub_off(b, c, b, 0);
12463
0
        }
12464
        /* 2.4. Else (v > u) */
12465
0
        else {
12466
            /* 2.4.1. v -= u */
12467
0
            _sp_sub_off(v, u, v, 0);
12468
            /* 2.4.2. c = (c - b) mod m */
12469
0
            if (_sp_cmp_abs(c, b) == MP_LT) {
12470
0
                _sp_add_off(c, m, c, 0);
12471
0
            }
12472
0
            _sp_sub_off(c, b, c, 0);
12473
0
        }
12474
0
    }
12475
    /* 3. NO_INVERSE if u == 0 */
12476
0
    if (sp_iszero(u)) {
12477
0
        err = MP_VAL;
12478
0
    }
12479
12480
0
    return err;
12481
0
}
12482
12483
#if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
12484
    (!defined(NO_RSA) || !defined(NO_DH))
12485
/* Calculates the multiplicative inverse in the field. r*a = x*m + 1
12486
 * Extended Euclidean Algorithm. NOT constant time.
12487
 *
12488
 * Creates two new SP ints.
12489
 *
12490
 * Algorithm:
12491
 *  1. x = m, y = a, b = 1, c = 0
12492
 *  2. while x > 1
12493
 *   2.1. d = x / y, r = x mod y
12494
 *   2.2. c -= d * b
12495
 *   2.3. x = y, y = r
12496
 *   2.4. s = b, b = c, c = s
12497
 *  3. If y != 0 then NO_INVERSE
12498
 *  4. If c < 0 then c += m
12499
 *  5. inv = c
12500
 *
12501
 * @param [in]      a    SP integer to find inverse of.
12502
 * @param [in]      m    SP integer that is the modulus.
12503
 * @param [in, out] x    SP integer to use in calculation.
12504
 * @param [in, out] y    SP integer to use in calculation.
12505
 * @param [in, out] b    SP integer to use in calculation.
12506
 * @param [in, out] c    SP integer to use in calculation.
12507
 * @param [out]     inv  SP integer that is the inverse.
12508
 *
12509
 * @return  MP_OKAY on success.
12510
 * @return  MP_VAL when no inverse.
12511
 * @return  MP_MEM when dynamic memory allocation fails.
12512
 */
12513
static int _sp_invmod_div(const sp_int* a, const sp_int* m, sp_int* x,
12514
    sp_int* y, sp_int* b, sp_int* c, sp_int* inv)
12515
0
{
12516
0
    int err = MP_OKAY;
12517
0
    sp_int* s;
12518
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
12519
0
    int bneg = 0;
12520
0
    int cneg = 0;
12521
0
    int neg;
12522
0
#endif
12523
0
    DECL_SP_INT(d, m->used + 1);
12524
12525
0
    ALLOC_SP_INT(d, m->used + 1, err, NULL);
12526
0
    if (err == MP_OKAY) {
12527
0
        err = sp_init_size(d, (sp_size_t)(m->used + 1U));
12528
0
    }
12529
12530
0
    if (err == MP_OKAY) {
12531
        /* 1. x = m, y = a, b = 1, c = 0 */
12532
0
        if (a != y) {
12533
0
            _sp_copy(a, y);
12534
0
        }
12535
0
        _sp_copy(m, x);
12536
0
        _sp_set(b, 1);
12537
0
        _sp_zero(c);
12538
0
    }
12539
#ifdef WOLFSSL_SP_INT_NEGATIVE
12540
    /* 2. while x > 1 */
12541
    while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
12542
        /* 2.1. d = x / y, r = x mod y */
12543
        err = sp_div(x, y, d, x);
12544
        if (err == MP_OKAY) {
12545
            /* 2.2. c -= d * b */
12546
            if (sp_isone(d)) {
12547
                /* c -= 1 * b */
12548
                err = sp_sub(c, b, c);
12549
            }
12550
            else {
12551
                /* d *= b */
12552
                err = sp_mul(d, b, d);
12553
                /* c -= d */
12554
                if (err == MP_OKAY) {
12555
                    err = sp_sub(c, d, c);
12556
                }
12557
            }
12558
            /* 2.3. x = y, y = r */
12559
            s = y; y = x; x = s;
12560
            /* 2.4. s = b, b = c, c = s */
12561
            s = b; b = c; c = s;
12562
        }
12563
    }
12564
    /* 3. If y != 0 then NO_INVERSE */
12565
    if ((err == MP_OKAY) && (!sp_iszero(y))) {
12566
        err = MP_VAL;
12567
    }
12568
    /* 4. If c < 0 then c += m */
12569
    if ((err == MP_OKAY) && sp_isneg(c)) {
12570
        err = sp_add(c, m, c);
12571
    }
12572
    if (err == MP_OKAY) {
12573
        /* 5. inv = c */
12574
        err = sp_copy(c, inv);
12575
    }
12576
#else
12577
    /* 2. while x > 1 */
12578
0
    while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
12579
        /* 2.1. d = x / y, r = x mod y */
12580
0
        err = sp_div(x, y, d, x);
12581
0
        if (err == MP_OKAY) {
12582
0
            if (sp_isone(d)) {
12583
                /* c -= 1 * b */
12584
0
                if ((bneg ^ cneg) == 1) {
12585
                    /* c -= -b or -c -= b, therefore add. */
12586
0
                    _sp_add_off(c, b, c, 0);
12587
0
                }
12588
0
                else if (_sp_cmp_abs(c, b) == MP_LT) {
12589
                    /* |c| < |b| and same sign, reverse subtract and negate. */
12590
0
                    _sp_sub_off(b, c, c, 0);
12591
0
                    cneg = !cneg;
12592
0
                }
12593
0
                else {
12594
                    /* |c| >= |b| */
12595
0
                    _sp_sub_off(c, b, c, 0);
12596
0
                }
12597
0
            }
12598
0
            else {
12599
                /* d *= b */
12600
0
                err = sp_mul(d, b, d);
12601
                /* c -= d */
12602
0
                if (err == MP_OKAY) {
12603
0
                    if ((bneg ^ cneg) == 1) {
12604
                        /* c -= -d or -c -= d, therefore add. */
12605
0
                        _sp_add_off(c, d, c, 0);
12606
0
                    }
12607
0
                    else if (_sp_cmp_abs(c, d) == MP_LT) {
12608
                        /* |c| < |d| and same sign, reverse subtract and negate.
12609
                         */
12610
0
                        _sp_sub_off(d, c, c, 0);
12611
0
                        cneg = !cneg;
12612
0
                    }
12613
0
                    else {
12614
0
                        _sp_sub_off(c, d, c, 0);
12615
0
                    }
12616
0
                }
12617
0
            }
12618
            /* 2.3. x = y, y = r */
12619
0
            s = y; y = x; x = s;
12620
            /* 2.4. s = b, b = c, c = s */
12621
0
            s = b; b = c; c = s;
12622
0
            neg = bneg; bneg = cneg; cneg = neg;
12623
0
        }
12624
0
    }
12625
    /* 3. If y != 0 then NO_INVERSE */
12626
0
    if ((err == MP_OKAY) && (!sp_iszero(y))) {
12627
0
        err = MP_VAL;
12628
0
    }
12629
    /* 4. If c < 0 then c += m */
12630
0
    if ((err == MP_OKAY) && cneg) {
12631
        /* c = m - |c| */
12632
0
        _sp_sub_off(m, c, c, 0);
12633
0
    }
12634
0
    if (err == MP_OKAY) {
12635
        /* 5. inv = c */
12636
0
        err = sp_copy(c, inv);
12637
0
    }
12638
0
#endif
12639
12640
0
    FREE_SP_INT(d, NULL);
12641
0
    return err;
12642
0
}
12643
#endif
12644
12645
/* Calculates the multiplicative inverse in the field.
12646
 * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
12647
 *
12648
 * r*a = x*m + 1
12649
 *
12650
 * @param [in]  a  SP integer to find inverse of.
12651
 * @param [in]  m  SP integer that is the modulus.
12652
 * @param [out] r  SP integer to hold result. r cannot be m.
12653
 *
12654
 * @return  MP_OKAY on success.
12655
 * @return  MP_VAL when m is even and a divides m evenly.
12656
 * @return  MP_MEM when dynamic memory allocation fails.
12657
 */
12658
static int _sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
12659
0
{
12660
0
    int err = MP_OKAY;
12661
0
    sp_int* u = NULL;
12662
0
    sp_int* v = NULL;
12663
0
    sp_int* b = NULL;
12664
0
    DECL_SP_INT_ARRAY(t, m->used + 1, 3);
12665
0
    DECL_SP_INT(c, 2 * m->used + 1);
12666
12667
    /* Allocate SP ints:
12668
     *  - x3 one word larger than modulus
12669
     *  - x1 one word longer than twice modulus used
12670
     */
12671
0
    ALLOC_SP_INT_ARRAY(t, m->used + 1U, 3, err, NULL);
12672
0
    ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
12673
0
    if (err == MP_OKAY) {
12674
0
        u = t[0];
12675
0
        v = t[1];
12676
0
        b = t[2];
12677
        /* c allocated separately and larger for even mod case. */
12678
0
    }
12679
12680
    /* Initialize intermediate values with minimal sizes. */
12681
0
    if (err == MP_OKAY) {
12682
0
        err = sp_init_size(u, (sp_size_t)(m->used + 1U));
12683
0
    }
12684
0
    if (err == MP_OKAY) {
12685
0
        err = sp_init_size(v, (sp_size_t)(m->used + 1U));
12686
0
    }
12687
0
    if (err == MP_OKAY) {
12688
0
        err = sp_init_size(b, (sp_size_t)(m->used + 1U));
12689
0
    }
12690
0
    if (err == MP_OKAY) {
12691
0
        err = sp_init_size(c, (sp_size_t)(2U * m->used + 1U));
12692
0
    }
12693
12694
0
    if (err == MP_OKAY) {
12695
0
        const sp_int* mm = m;
12696
0
        const sp_int* ma = a;
12697
0
        int evenMod = 0;
12698
12699
0
        if (sp_iseven(m)) {
12700
            /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
12701
0
            mm = a;
12702
0
            ma = v;
12703
0
            _sp_copy(a, u);
12704
0
            err = sp_mod(m, a, v);
12705
            /* v == 0 when a divides m evenly - no inverse.  */
12706
0
            if ((err == MP_OKAY) && sp_iszero(v)) {
12707
0
                err = MP_VAL;
12708
0
            }
12709
0
            evenMod = 1;
12710
0
        }
12711
12712
0
        if (err == MP_OKAY) {
12713
            /* Calculate inverse. */
12714
0
        #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
12715
0
            (!defined(NO_RSA) || !defined(NO_DH))
12716
0
            if (sp_count_bits(mm) >= 1024) {
12717
0
                err = _sp_invmod_div(ma, mm, u, v, b, c, c);
12718
0
            }
12719
0
            else
12720
0
        #endif
12721
0
            {
12722
0
                err = _sp_invmod_bin(ma, mm, u, v, b, c);
12723
0
            }
12724
0
        }
12725
12726
        /* Fixup for even modulus. */
12727
0
        if ((err == MP_OKAY) && evenMod) {
12728
            /* Finish operation.
12729
             *    a^-1 mod m = m + ((1 - m*c) / a)
12730
             * => a^-1 mod m = m - ((m*c - 1) / a)
12731
             */
12732
0
            err = sp_mul(c, m, c);
12733
0
            if (err == MP_OKAY) {
12734
0
                _sp_sub_d(c, 1, c);
12735
0
                err = sp_div(c, a, c, NULL);
12736
0
            }
12737
0
            if (err == MP_OKAY) {
12738
0
                err = sp_sub(m, c, r);
12739
0
            }
12740
0
        }
12741
0
        else if (err == MP_OKAY) {
12742
0
            _sp_copy(c, r);
12743
0
        }
12744
0
    }
12745
12746
0
    FREE_SP_INT(c, NULL);
12747
0
    FREE_SP_INT_ARRAY(t, NULL);
12748
0
    return err;
12749
0
}
12750
12751
/* Calculates the multiplicative inverse in the field.
12752
 * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
12753
 *
12754
 * r*a = x*m + 1
12755
 *
12756
 * @param [in]  a  SP integer to find inverse of.
12757
 * @param [in]  m  SP integer that is the modulus.
12758
 * @param [out] r  SP integer to hold result. r cannot be m.
12759
 *
12760
 * @return  MP_OKAY on success.
12761
 * @return  MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
12762
 *          m is negative.
12763
 * @return  MP_MEM when dynamic memory allocation fails.
12764
 */
12765
int sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
12766
0
{
12767
0
    int err = MP_OKAY;
12768
12769
    /* Validate parameters. */
12770
0
    if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
12771
0
        err = MP_VAL;
12772
0
    }
12773
0
    if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
12774
0
        err = MP_VAL;
12775
0
    }
12776
12777
#ifdef WOLFSSL_SP_INT_NEGATIVE
12778
    /* Don't support negative modulus. */
12779
    if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
12780
        err = MP_VAL;
12781
    }
12782
#endif
12783
12784
0
    if (err == MP_OKAY) {
12785
        /* Ensure number is less than modulus. */
12786
0
        if (_sp_cmp_abs(a, m) != MP_LT) {
12787
0
            err = sp_mod(a, m, r);
12788
0
            a = r;
12789
0
        }
12790
0
    }
12791
12792
#ifdef WOLFSSL_SP_INT_NEGATIVE
12793
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
12794
        /* Make 'a' positive */
12795
        err = sp_add(m, a, r);
12796
        a = r;
12797
    }
12798
#endif
12799
12800
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1)  */
12801
0
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
12802
0
        err = MP_VAL;
12803
0
    }
12804
    /* r*2*x != n*2*y + 1 for integer x,y */
12805
0
    if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
12806
0
        err = MP_VAL;
12807
0
    }
12808
    /* 1*1 = 0*m + 1  */
12809
0
    if ((err == MP_OKAY) && sp_isone(a)) {
12810
0
        _sp_set(r, 1);
12811
0
    }
12812
0
    else if (err == MP_OKAY) {
12813
0
        err = _sp_invmod(a, m, r);
12814
0
    }
12815
12816
0
    return err;
12817
0
}
12818
#endif /* WOLFSSL_SP_INVMOD */
12819
12820
#ifdef WOLFSSL_SP_INVMOD_MONT_CT
12821
12822
/* Number of entries to pre-compute.
12823
 * Many pre-defined primes have multiple of 8 consecutive 1s.
12824
 * P-256 modulus - 2 => 32x1, 31x0, 1x1, 96x0, 94x1, 1x0, 1x1.
12825
 */
12826
0
#define CT_INV_MOD_PRE_CNT      8
12827
12828
/* Calculates the multiplicative inverse in the field - constant time.
12829
 *
12830
 * Modulus (m) must be a prime and greater than 2.
12831
 * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
12832
 *
12833
 * Algorithm:
12834
 *  pre = pre-computed values, m = modulus, a = value to find inverse of,
12835
 *  e = exponent
12836
 *  Pre-calc:
12837
 *   1. pre[0] = 2^0 * a mod m
12838
 *   2. For i in 1..CT_INV_MOD_PRE_CNT-1
12839
 *    2.1. pre[i] = ((pre[i-1] ^ 2) * a) mod m
12840
 *  Calc inverse:
12841
 *   1. e = m - 2
12842
 *   2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
12843
 *   3. t = pre[j-1]
12844
 *   4. s = 0
12845
 *   5. j = 0
12846
 *   6. For i index of next top bit..0
12847
 *    6.1. bit = e[i]
12848
 *    6.2. j += bit
12849
 *    6.3. s += 1
12850
 *    6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
12851
 *     6.4.1. s -= 1 - bit
12852
 *     6.4.2. For s downto 1
12853
 *      6.4.2.1. t = (t ^ 2) mod m
12854
 *     6.4.3. s = 1 - bit
12855
 *     6.4.4. t = (t * pre[j-1]) mod m
12856
 *     6.4.5. j = 0
12857
 *   7. For s downto 1
12858
 *    7.1. t = (t ^ 2) mod m
12859
 *   8. If j > 0 then r = (t * pre[j-1]) mod m
12860
 *   9. Else r = t
12861
 *
12862
 * @param [in]  a   SP integer, Montgomery form, to find inverse of.
12863
 * @param [in]  m   SP integer that is the modulus.
12864
 * @param [out] r   SP integer to hold result.
12865
 * @param [in]  mp  SP integer digit that is the bottom digit of inv(-m).
12866
 *
12867
 * @return  MP_OKAY on success.
12868
 * @return  MP_MEM when dynamic memory allocation fails.
12869
 */
12870
static int _sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
12871
    sp_int_digit mp)
12872
0
{
12873
0
    int err = MP_OKAY;
12874
0
    int i;
12875
0
    int j = 0;
12876
0
    int s = 0;
12877
0
    sp_int* t = NULL;
12878
0
    sp_int* e = NULL;
12879
0
#ifndef WOLFSSL_SP_NO_MALLOC
12880
0
    DECL_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
12881
#else
12882
    DECL_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
12883
#endif
12884
12885
0
#ifndef WOLFSSL_SP_NO_MALLOC
12886
0
    ALLOC_DYN_SP_INT_ARRAY(pre, m->used * 2U + 1U, CT_INV_MOD_PRE_CNT + 2, err,
12887
0
        NULL);
12888
#else
12889
    ALLOC_SP_INT_ARRAY(pre, m->used * 2U + 1U, CT_INV_MOD_PRE_CNT + 2, err,
12890
        NULL);
12891
#endif
12892
0
    if (err == MP_OKAY) {
12893
0
        t = pre[CT_INV_MOD_PRE_CNT + 0];
12894
0
        e = pre[CT_INV_MOD_PRE_CNT + 1];
12895
        /* Space for sqr and mul result. */
12896
0
        _sp_init_size(t, (sp_size_t)(m->used * 2 + 1));
12897
        /* e = mod - 2 */
12898
0
        _sp_init_size(e, (sp_size_t)(m->used + 1));
12899
12900
        /* Create pre-computation results: ((2^(1..8))-1).a. */
12901
0
        _sp_init_size(pre[0], (sp_size_t)(m->used * 2 + 1));
12902
        /* 1. pre[0] = 2^0 * a mod m
12903
         *    Start with 1.a = a.
12904
         */
12905
0
        _sp_copy(a, pre[0]);
12906
        /* 2. For i in 1..CT_INV_MOD_PRE_CNT-1
12907
         *    For rest of entries in table.
12908
         */
12909
0
        for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
12910
            /* 2.1 pre[i] = ((pre[i-1] ^ 2) * a) mod m */
12911
            /* Previous value ..1 -> ..10 */
12912
0
            _sp_init_size(pre[i], (sp_size_t)(m->used * 2 + 1));
12913
0
            err = sp_sqr(pre[i-1], pre[i]);
12914
0
            if (err == MP_OKAY) {
12915
0
                err = _sp_mont_red(pre[i], m, mp, 0);
12916
0
            }
12917
            /* ..10 -> ..11 */
12918
0
            if (err == MP_OKAY) {
12919
0
                err = sp_mul(pre[i], a, pre[i]);
12920
0
            }
12921
0
            if (err == MP_OKAY) {
12922
0
                err = _sp_mont_red(pre[i], m, mp, 0);
12923
0
            }
12924
0
        }
12925
0
    }
12926
12927
0
    if (err == MP_OKAY) {
12928
        /* 1. e = m - 2 */
12929
0
        _sp_sub_d(m, 2, e);
12930
        /* 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
12931
         *    One or more of the top bits is 1 so count.
12932
         */
12933
0
        for (i = sp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
12934
0
            if ((!sp_is_bit_set(e, (unsigned int)i)) ||
12935
0
                    (j == CT_INV_MOD_PRE_CNT)) {
12936
0
                break;
12937
0
            }
12938
0
        }
12939
        /* 3. Set tmp to product of leading bits. */
12940
0
        _sp_copy(pre[j-1], t);
12941
12942
        /* 4. s = 0 */
12943
0
        s = 0;
12944
        /* 5. j = 0 */
12945
0
        j = 0;
12946
        /* 6. For i index of next top bit..0
12947
         *    Do remaining bits in exponent.
12948
         */
12949
0
        for (; (err == MP_OKAY) && (i >= 0); i--) {
12950
            /* 6.1. bit = e[i] */
12951
0
            int bit = sp_is_bit_set(e, (unsigned int)i);
12952
12953
            /* 6.2. j += bit
12954
             *      Update count of consecutive 1 bits.
12955
             */
12956
0
            j += bit;
12957
            /* 6.3. s += 1
12958
             *      Update count of squares required.
12959
             */
12960
0
            s++;
12961
12962
            /* 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
12963
             *      Check if max 1 bits or 0 and have seen at least one 1 bit.
12964
             */
12965
0
            if ((j == CT_INV_MOD_PRE_CNT) || ((!bit) && (j > 0))) {
12966
                /* 6.4.1. s -= 1 - bit */
12967
0
                bit = 1 - bit;
12968
0
                s -= bit;
12969
                /* 6.4.2. For s downto 1
12970
                 *        Do s squares.
12971
                 */
12972
0
                for (; (err == MP_OKAY) && (s > 0); s--) {
12973
                    /* 6.4.2.1. t = (t ^ 2) mod m */
12974
0
                    err = sp_sqr(t, t);
12975
0
                    if (err == MP_OKAY) {
12976
0
                        err = _sp_mont_red(t, m, mp, 0);
12977
0
                    }
12978
0
                }
12979
                /* 6.4.3. s = 1 - bit */
12980
0
                s = bit;
12981
12982
                /* 6.4.4. t = (t * pre[j-1]) mod m */
12983
0
                if (err == MP_OKAY) {
12984
0
                    err = sp_mul(t, pre[j-1], t);
12985
0
                }
12986
0
                if (err == MP_OKAY) {
12987
0
                    err = _sp_mont_red(t, m, mp, 0);
12988
0
                }
12989
                /* 6.4.5. j = 0
12990
                 *        Reset number of 1 bits seen.
12991
                 */
12992
0
                j = 0;
12993
0
            }
12994
0
        }
12995
0
    }
12996
0
    if (err == MP_OKAY) {
12997
        /* 7. For s downto 1
12998
         *    Do s squares - total remaining. */
12999
0
        for (; (err == MP_OKAY) && (s > 0); s--) {
13000
            /* 7.1. t = (t ^ 2) mod m */
13001
0
            err = sp_sqr(t, t);
13002
0
            if (err == MP_OKAY) {
13003
0
                err = _sp_mont_red(t, m, mp, 0);
13004
0
            }
13005
0
        }
13006
0
    }
13007
0
    if (err == MP_OKAY) {
13008
        /* 8. If j > 0 then r = (t * pre[j-1]) mod m */
13009
0
        if (j > 0) {
13010
0
            err = sp_mul(t, pre[j-1], r);
13011
0
            if (err == MP_OKAY) {
13012
0
                err = _sp_mont_red(r, m, mp, 0);
13013
0
            }
13014
0
        }
13015
        /* 9. Else r = t */
13016
0
        else {
13017
0
            _sp_copy(t, r);
13018
0
        }
13019
0
    }
13020
13021
0
#ifndef WOLFSSL_SP_NO_MALLOC
13022
0
    FREE_DYN_SP_INT_ARRAY(pre, NULL);
13023
#else
13024
    FREE_SP_INT_ARRAY(pre, NULL);
13025
#endif
13026
0
    return err;
13027
0
}
13028
13029
/* Calculates the multiplicative inverse in the field - constant time.
13030
 *
13031
 * Modulus (m) must be a prime and greater than 2.
13032
 * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
13033
 *
13034
 * @param [in]  a   SP integer, Montgomery form, to find inverse of.
13035
 * @param [in]  m   SP integer that is the modulus.
13036
 * @param [out] r   SP integer to hold result.
13037
 * @param [in]  mp  SP integer digit that is the bottom digit of inv(-m).
13038
 *
13039
 * @return  MP_OKAY on success.
13040
 * @return  MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
13041
 * @return  MP_MEM when dynamic memory allocation fails.
13042
 */
13043
int sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
13044
    sp_int_digit mp)
13045
0
{
13046
0
    int err = MP_OKAY;
13047
13048
    /* Validate parameters. */
13049
0
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
13050
0
        err = MP_VAL;
13051
0
    }
13052
    /* Ensure m is not too big. */
13053
0
    else if (m->used * 2 >= SP_INT_DIGITS) {
13054
0
        err = MP_VAL;
13055
0
    }
13056
    /* check that r can hold the range of the modulus result */
13057
0
    else if (m->used > r->size) {
13058
0
        err = MP_VAL;
13059
0
    }
13060
13061
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
13062
0
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
13063
0
            ((m->used == 1) && (m->dp[0] < 3)))) {
13064
0
        err = MP_VAL;
13065
0
    }
13066
13067
0
    if (err == MP_OKAY) {
13068
        /* Do operation. */
13069
0
        err = _sp_invmod_mont_ct(a, m, r, mp);
13070
0
    }
13071
13072
0
    return err;
13073
0
}
13074
13075
#endif /* WOLFSSL_SP_INVMOD_MONT_CT */
13076
13077
13078
/**************************
13079
 * Exponentiation functions
13080
 **************************/
13081
13082
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
13083
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
13084
    defined(OPENSSL_ALL)
13085
13086
#ifndef WC_PROTECT_ENCRYPTED_MEM
13087
13088
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13089
 *
13090
 * Processes the exponent one bit at a time.
13091
 * Implementation is constant time and can be cache attack resistant.
13092
 *
13093
 * Algorithm:
13094
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13095
 *  1. s = 0
13096
 *  2. t[0] = b mod m.
13097
 *  3. t[1] = t[0]
13098
 *  4. For i in (bits-1)...0
13099
 *   4.1. t[s] = t[s] ^ 2
13100
 *   4.2. y = e[i]
13101
 *   4.3  j = y & s
13102
 *   4.4  s = s | y
13103
 *   4.5. t[j] = t[j] * b
13104
 *  5. r = t[1]
13105
 *
13106
 * @param [in]  b     SP integer that is the base.
13107
 * @param [in]  e     SP integer that is the exponent.
13108
 * @param [in]  bits  Number of bits in exponent to use. May be greater than
13109
 *                    count of bits in e.
13110
 * @param [in]  m     SP integer that is the modulus.
13111
 * @param [out] r     SP integer to hold result.
13112
 *
13113
 * @return  MP_OKAY on success.
13114
 * @return  MP_MEM when dynamic memory allocation fails.
13115
 */
13116
static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
13117
    const sp_int* m, sp_int* r)
13118
0
{
13119
0
    int i;
13120
0
    int err = MP_OKAY;
13121
0
    int done = 0;
13122
    /* 1. s = 0 */
13123
0
    int s = 0;
13124
#ifdef WC_NO_CACHE_RESISTANT
13125
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
13126
#else
13127
0
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
13128
0
#endif
13129
13130
    /* Allocate temporaries. */
13131
#ifdef WC_NO_CACHE_RESISTANT
13132
    ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
13133
#else
13134
    /* Working SP int needed when cache resistant. */
13135
0
    ALLOC_SP_INT_ARRAY(t, 2U * m->used + 1U, 3, err, NULL);
13136
0
#endif
13137
0
    if (err == MP_OKAY) {
13138
        /* Initialize temporaries. */
13139
0
        _sp_init_size(t[0], (sp_size_t)(m->used * 2 + 1));
13140
0
        _sp_init_size(t[1], (sp_size_t)(m->used * 2 + 1));
13141
0
    #ifndef WC_NO_CACHE_RESISTANT
13142
0
        _sp_init_size(t[2], (sp_size_t)(m->used * 2 + 1));
13143
0
    #endif
13144
13145
        /* 2. t[0] = b mod m
13146
         * Ensure base is less than modulus - set fake working value to base.
13147
         */
13148
0
        if (_sp_cmp_abs(b, m) != MP_LT) {
13149
0
            err = sp_mod(b, m, t[0]);
13150
            /* Handle base == modulus. */
13151
0
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
13152
0
                _sp_set(r, 0);
13153
0
                done = 1;
13154
0
            }
13155
0
        }
13156
0
        else {
13157
            /* Copy base into working variable. */
13158
0
            _sp_copy(b, t[0]);
13159
0
        }
13160
0
    }
13161
13162
0
    if ((!done) && (err == MP_OKAY)) {
13163
        /* 3. t[1] = t[0]
13164
         *    Set real working value to base.
13165
         */
13166
0
        _sp_copy(t[0], t[1]);
13167
13168
        /* 4. For i in (bits-1)...0 */
13169
0
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13170
#ifdef WC_NO_CACHE_RESISTANT
13171
            /* 4.1. t[s] = t[s] ^ 2 */
13172
            err = sp_sqrmod(t[s], m, t[s]);
13173
            if (err == MP_OKAY) {
13174
                /* 4.2. y = e[i] */
13175
                int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13176
                /* 4.3. j = y & s */
13177
                int j = y & s;
13178
                /* 4.4  s = s | y */
13179
                s |= y;
13180
                /* 4.5. t[j] = t[j] * b */
13181
                err = _sp_mulmod(t[j], b, m, t[j]);
13182
            }
13183
#else
13184
            /* 4.1. t[s] = t[s] ^ 2 */
13185
#ifdef WC_NO_PTR_INT_CAST
13186
            _sp_cond_copy(t[0], s^1, t[2], m->used);
13187
            _sp_cond_copy(t[1], s,   t[2], m->used);
13188
#else
13189
0
            _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13190
0
                               ((size_t)t[1] & sp_off_on_addr[s  ])),
13191
0
                     t[2]);
13192
0
#endif
13193
0
            err = sp_sqrmod(t[2], m, t[2]);
13194
#ifdef WC_NO_PTR_INT_CAST
13195
            _sp_cond_copy(t[2], s^1, t[0], m->used);
13196
            _sp_cond_copy(t[2], s,   t[1], m->used);
13197
#else
13198
0
            _sp_copy(t[2],
13199
0
                     (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13200
0
                               ((size_t)t[1] & sp_off_on_addr[s  ])));
13201
0
#endif
13202
13203
0
            if (err == MP_OKAY) {
13204
                /* 4.2. y = e[i] */
13205
0
                int y = (int)((e->dp[i >> SP_WORD_SHIFT] >>
13206
0
                               (i & (int)SP_WORD_MASK)) & 1);
13207
                /* 4.3. j = y & s */
13208
0
                int j = y & s;
13209
                /* 4.4  s = s | y */
13210
0
                s |= y;
13211
                /* 4.5. t[j] = t[j] * b */
13212
#ifdef WC_NO_PTR_INT_CAST
13213
                _sp_cond_copy(t[0], j^1, t[2], m->used);
13214
                _sp_cond_copy(t[1], j,   t[2], m->used);
13215
#else
13216
0
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13217
0
                                   ((size_t)t[1] & sp_off_on_addr[j  ])),
13218
0
                         t[2]);
13219
0
#endif
13220
0
                err = _sp_mulmod(t[2], b, m, t[2]);
13221
#ifdef WC_NO_PTR_INT_CAST
13222
                _sp_cond_copy(t[2], j^1, t[0], m->used);
13223
                _sp_cond_copy(t[2], j,   t[1], m->used);
13224
#else
13225
0
                _sp_copy(t[2],
13226
0
                         (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13227
0
                                   ((size_t)t[1] & sp_off_on_addr[j  ])));
13228
0
#endif
13229
0
            }
13230
0
#endif
13231
0
        }
13232
0
    }
13233
0
    if ((!done) && (err == MP_OKAY)) {
13234
        /* 5. r = t[1] */
13235
0
        _sp_copy(t[1], r);
13236
0
    }
13237
13238
0
    FREE_SP_INT_ARRAY(t, NULL);
13239
0
    return err;
13240
0
}
13241
13242
#else
13243
13244
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13245
 * Process the exponent one bit at a time with base in Montgomery form.
13246
 * Is constant time and cache attack resistant.
13247
 *
13248
 * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
13249
 * Cryptographic Hardware and Embedded Systems, CHES 2002
13250
 *
13251
 * Algorithm:
13252
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13253
 *  1. t[1] = b mod m.
13254
 *  2. t[0] = 1
13255
 *  3. For i in (bits-1)...0
13256
 *   3.1. y = e[i]
13257
 *   3.2. t[2] = t[0] * t[1]
13258
 *   3.3. t[3] = t[y] ^ 2
13259
 *   3.4. t[y] = t[3], t[y^1] = t[2]
13260
 *  4. r = t[0]
13261
 *
13262
 * @param [in]  b     SP integer that is the base.
13263
 * @param [in]  e     SP integer that is the exponent.
13264
 * @param [in]  bits  Number of bits in exponent to use. May be greater than
13265
 *                    count of bits in e.
13266
 * @param [in]  m     SP integer that is the modulus.
13267
 * @param [out] r     SP integer to hold result.
13268
 *
13269
 * @return  MP_OKAY on success.
13270
 * @return  MP_MEM when dynamic memory allocation fails.
13271
 */
13272
static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
13273
    const sp_int* m, sp_int* r)
13274
{
13275
    int err = MP_OKAY;
13276
    int done = 0;
13277
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13278
13279
    /* Allocate temporaries. */
13280
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
13281
    if (err == MP_OKAY) {
13282
        /* Initialize temporaries. */
13283
        _sp_init_size(t[0], m->used * 2 + 1);
13284
        _sp_init_size(t[1], m->used * 2 + 1);
13285
        _sp_init_size(t[2], m->used * 2 + 1);
13286
        _sp_init_size(t[3], m->used * 2 + 1);
13287
13288
        /* 1. Ensure base is less than modulus. */
13289
        if (_sp_cmp_abs(b, m) != MP_LT) {
13290
            err = sp_mod(b, m, t[1]);
13291
            /* Handle base == modulus. */
13292
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13293
                _sp_set(r, 0);
13294
                done = 1;
13295
            }
13296
        }
13297
        else {
13298
            /* Copy base into working variable. */
13299
            err = sp_copy(b, t[1]);
13300
        }
13301
    }
13302
13303
    if ((!done) && (err == MP_OKAY)) {
13304
        int i;
13305
13306
        /* 2. t[0] = 1 */
13307
        _sp_set(t[0], 1);
13308
13309
        /* 3. For i in (bits-1)...0 */
13310
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13311
            /* 3.1. y = e[i] */
13312
            int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13313
13314
            /* 3.2. t[2] = t[0] * t[1] */
13315
            err = sp_mulmod(t[0], t[1], m, t[2]);
13316
            /* 3.3. t[3] = t[y] ^ 2 */
13317
            if (err == MP_OKAY) {
13318
#ifdef WC_NO_PTR_INT_CAST
13319
                _sp_cond_copy(t[0], y^1, t[3], m->used);
13320
                _sp_cond_copy(t[1], y,   t[3], m->used);
13321
#else
13322
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
13323
                                   ((size_t)t[1] & sp_off_on_addr[y  ])),
13324
                         t[3]);
13325
#endif
13326
                err = sp_sqrmod(t[3], m, t[3]);
13327
            }
13328
            /* 3.4. t[y] = t[3], t[y^1] = t[2] */
13329
            if (err == MP_OKAY) {
13330
                _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
13331
            }
13332
        }
13333
    }
13334
    if ((!done) && (err == MP_OKAY)) {
13335
        /* 4. r = t[0] */
13336
        err = sp_copy(t[0], r);
13337
    }
13338
13339
    FREE_SP_INT_ARRAY(t, NULL);
13340
    return err;
13341
}
13342
13343
#endif /* WC_PROTECT_ENCRYPTED_MEM */
13344
13345
#endif
13346
13347
#if (defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
13348
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
13349
    defined(OPENSSL_ALL)
13350
#ifndef WC_NO_HARDEN
13351
#if !defined(WC_NO_CACHE_RESISTANT)
13352
13353
#ifndef WC_PROTECT_ENCRYPTED_MEM
13354
13355
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13356
 * Process the exponent one bit at a time with base in Montgomery form.
13357
 * Is constant time and cache attack resistant.
13358
 *
13359
 * Algorithm:
13360
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13361
 *  1. t[0] = b mod m.
13362
 *  2. s = 0
13363
 *  3. t[0] = ToMont(t[0])
13364
 *  4. t[1] = t[0]
13365
 *  5. bm = t[0]
13366
 *  6. For i in (bits-1)...0
13367
 *   6.1. t[s] = t[s] ^ 2
13368
 *   6.2. y = e[i]
13369
 *   6.3  j = y & s
13370
 *   6.4  s = s | y
13371
 *   6.5. t[j] = t[j] * bm
13372
 *  7. t[1] = FromMont(t[1])
13373
 *  8. r = t[1]
13374
 *
13375
 * @param [in]  b     SP integer that is the base.
13376
 * @param [in]  e     SP integer that is the exponent.
13377
 * @param [in]  bits  Number of bits in exponent to use. May be greater than
13378
 *                    count of bits in e.
13379
 * @param [in]  m     SP integer that is the modulus.
13380
 * @param [out] r     SP integer to hold result.
13381
 *
13382
 * @return  MP_OKAY on success.
13383
 * @return  MP_MEM when dynamic memory allocation fails.
13384
 */
13385
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13386
    const sp_int* m, sp_int* r)
13387
0
{
13388
0
    int err = MP_OKAY;
13389
0
    int done = 0;
13390
0
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13391
13392
    /* Allocate temporaries. */
13393
0
    ALLOC_SP_INT_ARRAY(t, m->used * 2U + 1U, 4, err, NULL);
13394
0
    if (err == MP_OKAY) {
13395
        /* Initialize temporaries. */
13396
0
        _sp_init_size(t[0], (sp_size_t)(m->used * 2 + 1));
13397
0
        _sp_init_size(t[1], (sp_size_t)(m->used * 2 + 1));
13398
0
        _sp_init_size(t[2], (sp_size_t)(m->used * 2 + 1));
13399
0
        _sp_init_size(t[3], (sp_size_t)(m->used * 2 + 1));
13400
13401
        /* 1. Ensure base is less than modulus. */
13402
0
        if (_sp_cmp_abs(b, m) != MP_LT) {
13403
0
            err = sp_mod(b, m, t[0]);
13404
            /* Handle base == modulus. */
13405
0
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
13406
0
                _sp_set(r, 0);
13407
0
                done = 1;
13408
0
            }
13409
0
        }
13410
0
        else {
13411
            /* Copy base into working variable. */
13412
0
            _sp_copy(b, t[0]);
13413
0
        }
13414
0
    }
13415
13416
0
    if ((!done) && (err == MP_OKAY)) {
13417
0
        int i;
13418
        /* 2. s = 0 */
13419
0
        int s = 0;
13420
0
        sp_int_digit mp;
13421
13422
        /* Calculate Montgomery multiplier for reduction. */
13423
0
        _sp_mont_setup(m, &mp);
13424
        /* 3. t[0] = ToMont(t[0])
13425
         *    Convert base to Montgomery form - as fake working value.
13426
         */
13427
0
        err = sp_mont_norm(t[1], m);
13428
0
        if (err == MP_OKAY) {
13429
0
            err = sp_mul(t[0], t[1], t[0]);
13430
0
        }
13431
0
        if (err == MP_OKAY) {
13432
            /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
13433
0
            err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1U);
13434
0
        }
13435
0
        if (err == MP_OKAY) {
13436
            /* 4. t[1] = t[0]
13437
             *    Set real working value to base.
13438
             */
13439
0
            _sp_copy(t[0], t[1]);
13440
            /* 5. bm = t[0]. */
13441
0
            _sp_copy(t[0], t[2]);
13442
0
        }
13443
13444
        /* 6. For i in (bits-1)...0 */
13445
0
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13446
            /* 6.1. t[s] = t[s] ^ 2 */
13447
#ifdef WC_NO_PTR_INT_CAST
13448
            _sp_cond_copy(t[0], s^1, t[3], m->used);
13449
            _sp_cond_copy(t[1], s,   t[3], m->used);
13450
#else
13451
0
            _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13452
0
                               ((size_t)t[1] & sp_off_on_addr[s  ])),
13453
0
                     t[3]);
13454
0
#endif
13455
0
            err = sp_sqr(t[3], t[3]);
13456
0
            if (err == MP_OKAY) {
13457
0
                err = _sp_mont_red(t[3], m, mp, 0);
13458
0
            }
13459
#ifdef WC_NO_PTR_INT_CAST
13460
            _sp_cond_copy(t[3], s^1, t[0], m->used);
13461
            _sp_cond_copy(t[3], s,   t[1], m->used);
13462
#else
13463
0
            _sp_copy(t[3],
13464
0
                     (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13465
0
                               ((size_t)t[1] & sp_off_on_addr[s  ])));
13466
0
#endif
13467
13468
0
            if (err == MP_OKAY) {
13469
                /* 6.2. y = e[i] */
13470
0
                int y = (int)((e->dp[i >> SP_WORD_SHIFT] >>
13471
0
                               (i & (int)SP_WORD_MASK)) & 1);
13472
                /* 6.3  j = y & s */
13473
0
                int j = y & s;
13474
                /* 6.4  s = s | y */
13475
0
                s |= y;
13476
13477
                /* 6.5. t[j] = t[j] * bm */
13478
#ifdef WC_NO_PTR_INT_CAST
13479
                _sp_cond_copy(t[0], j^1, t[3], m->used);
13480
                _sp_cond_copy(t[1], j,   t[3], m->used);
13481
#else
13482
0
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13483
0
                                   ((size_t)t[1] & sp_off_on_addr[j  ])),
13484
0
                         t[3]);
13485
0
#endif
13486
0
                err = sp_mul(t[3], t[2], t[3]);
13487
0
                if (err == MP_OKAY) {
13488
0
                    err = _sp_mont_red(t[3], m, mp, 0);
13489
0
                }
13490
#ifdef WC_NO_PTR_INT_CAST
13491
                _sp_cond_copy(t[3], j^1, t[0], m->used);
13492
                _sp_cond_copy(t[3], j,   t[1], m->used);
13493
#else
13494
0
                _sp_copy(t[3],
13495
0
                         (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13496
0
                                   ((size_t)t[1] & sp_off_on_addr[j  ])));
13497
0
#endif
13498
0
            }
13499
0
        }
13500
0
        if (err == MP_OKAY) {
13501
            /* 7. t[1] = FromMont(t[1]) */
13502
0
            err = _sp_mont_red(t[1], m, mp, 0);
13503
            /* Reduction implementation returns number to range: 0..m-1. */
13504
0
        }
13505
0
    }
13506
0
    if ((!done) && (err == MP_OKAY)) {
13507
        /* 8. r = t[1] */
13508
0
        _sp_copy(t[1], r);
13509
0
    }
13510
13511
0
    FREE_SP_INT_ARRAY(t, NULL);
13512
0
    return err;
13513
0
}
13514
13515
#else
13516
13517
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13518
 * Process the exponent one bit at a time with base in Montgomery form.
13519
 * Is constant time and cache attack resistant.
13520
 *
13521
 * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
13522
 * Cryptographic Hardware and Embedded Systems, CHES 2002
13523
 *
13524
 * Algorithm:
13525
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13526
 *  1. t[1] = b mod m.
13527
 *  2. t[0] = ToMont(1)
13528
 *  3. t[1] = ToMont(t[1])
13529
 *  4. For i in (bits-1)...0
13530
 *   4.1. y = e[i]
13531
 *   4.2. t[2] = t[0] * t[1]
13532
 *   4.3. t[3] = t[y] ^ 2
13533
 *   4.4. t[y] = t[3], t[y^1] = t[2]
13534
 *  5. t[0] = FromMont(t[0])
13535
 *  6. r = t[0]
13536
 *
13537
 * @param [in]  b     SP integer that is the base.
13538
 * @param [in]  e     SP integer that is the exponent.
13539
 * @param [in]  bits  Number of bits in exponent to use. May be greater than
13540
 *                    count of bits in e.
13541
 * @param [in]  m     SP integer that is the modulus.
13542
 * @param [out] r     SP integer to hold result.
13543
 *
13544
 * @return  MP_OKAY on success.
13545
 * @return  MP_MEM when dynamic memory allocation fails.
13546
 */
13547
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13548
    const sp_int* m, sp_int* r)
13549
{
13550
    int err = MP_OKAY;
13551
    int done = 0;
13552
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13553
13554
    /* Allocate temporaries. */
13555
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
13556
    if (err == MP_OKAY) {
13557
        /* Initialize temporaries. */
13558
        _sp_init_size(t[0], m->used * 2 + 1);
13559
        _sp_init_size(t[1], m->used * 2 + 1);
13560
        _sp_init_size(t[2], m->used * 2 + 1);
13561
        _sp_init_size(t[3], m->used * 2 + 1);
13562
13563
        /* 1. Ensure base is less than modulus. */
13564
        if (_sp_cmp_abs(b, m) != MP_LT) {
13565
            err = sp_mod(b, m, t[1]);
13566
            /* Handle base == modulus. */
13567
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13568
                _sp_set(r, 0);
13569
                done = 1;
13570
            }
13571
        }
13572
        else {
13573
            /* Copy base into working variable. */
13574
            err = sp_copy(b, t[1]);
13575
        }
13576
    }
13577
13578
    if ((!done) && (err == MP_OKAY)) {
13579
        int i;
13580
        sp_int_digit mp;
13581
13582
        /* Calculate Montgomery multiplier for reduction. */
13583
        _sp_mont_setup(m, &mp);
13584
        /* 2. t[0] = ToMont(1)
13585
          *    Calculate 1 in Montgomery form.
13586
          */
13587
        err = sp_mont_norm(t[0], m);
13588
        if (err == MP_OKAY) {
13589
            /* 3. t[1] = ToMont(t[1])
13590
             *    Convert base to Montgomery form.
13591
             */
13592
            err = sp_mulmod(t[1], t[0], m, t[1]);
13593
        }
13594
13595
        /* 4. For i in (bits-1)...0 */
13596
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13597
            /* 4.1. y = e[i] */
13598
            int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13599
13600
            /* 4.2. t[2] = t[0] * t[1] */
13601
            err = sp_mul(t[0], t[1], t[2]);
13602
            if (err == MP_OKAY) {
13603
                err = _sp_mont_red(t[2], m, mp, 0);
13604
            }
13605
            /* 4.3. t[3] = t[y] ^ 2 */
13606
            if (err == MP_OKAY) {
13607
#ifdef WC_NO_PTR_INT_CAST
13608
                _sp_cond_copy(t[0], y^1, t[3], m->used);
13609
                _sp_cond_copy(t[1], y,   t[3], m->used);
13610
#else
13611
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
13612
                                   ((size_t)t[1] & sp_off_on_addr[y  ])),
13613
                         t[3]);
13614
#endif
13615
                err = sp_sqr(t[3], t[3]);
13616
            }
13617
            if (err == MP_OKAY) {
13618
                err = _sp_mont_red(t[3], m, mp, 0);
13619
            }
13620
            /* 4.4. t[y] = t[3], t[y^1] = t[2] */
13621
            if (err == MP_OKAY) {
13622
                _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
13623
            }
13624
        }
13625
13626
        if (err == MP_OKAY) {
13627
            /* 5. t[0] = FromMont(t[0]) */
13628
            err = _sp_mont_red(t[0], m, mp, 0);
13629
            /* Reduction implementation returns number to range: 0..m-1. */
13630
        }
13631
    }
13632
    if ((!done) && (err == MP_OKAY)) {
13633
        /* 6. r = t[0] */
13634
        err = sp_copy(t[0], r);
13635
    }
13636
13637
    FREE_SP_INT_ARRAY(t, NULL);
13638
    return err;
13639
}
13640
13641
#endif /* WC_PROTECT_ENCRYPTED_MEM */
13642
13643
#else
13644
13645
#ifdef SP_ALLOC
13646
#define SP_ALLOC_PREDEFINED
13647
#endif
13648
/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
13649
#define SP_ALLOC
13650
13651
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13652
 * Creates a window of precalculated exponents with base in Montgomery form.
13653
 * Is constant time but NOT cache attack resistant.
13654
 *
13655
 * Algorithm:
13656
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13657
 *  w: window size based on bits.
13658
 *  1. t[1] = b mod m.
13659
 *  2. t[0] = MontNorm(m) = ToMont(1)
13660
 *  3. t[1] = ToMont(t[1])
13661
 *  4. For i in 2..(2 ^ w) - 1
13662
 *   4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2
13663
 *   4.2 if i[0] == 1 then t[i] = t[i-1] * t[1]
13664
 *  5. cb = w * (bits / w)
13665
 *  6. tr = t[e / (2 ^ cb)]
13666
 *  7. For i in cb..w
13667
 *   7.1. y = e[(i-1)..(i-w)]
13668
 *   7.2. tr = tr ^ (2 ^ w)
13669
 *   7.3. tr = tr * t[y]
13670
 *  8. tr = FromMont(tr)
13671
 *  9. r = tr
13672
 *
13673
 * @param [in]  b     SP integer that is the base.
13674
 * @param [in]  e     SP integer that is the exponent.
13675
 * @param [in]  bits  Number of bits in exponent to use. May be greater than
13676
 *                    count of bits in e.
13677
 * @param [in]  m     SP integer that is the modulus.
13678
 * @param [out] r     SP integer to hold result.
13679
 *
13680
 * @return  MP_OKAY on success.
13681
 * @return  MP_MEM when dynamic memory allocation fails.
13682
 */
13683
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13684
    const sp_int* m, sp_int* r)
13685
{
13686
    int i;
13687
    int c;
13688
    int y;
13689
    int winBits;
13690
    int preCnt;
13691
    int err = MP_OKAY;
13692
    int done = 0;
13693
    sp_int_digit mask;
13694
    sp_int* tr = NULL;
13695
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
13696
13697
    /* Window bits based on number of pre-calculations versus number of loop
13698
     * calculations.
13699
     * Exponents for RSA and DH will result in 6-bit windows.
13700
     */
13701
    if (bits > 450) {
13702
        winBits = 6;
13703
    }
13704
    else if (bits <= 21) {
13705
        winBits = 1;
13706
    }
13707
    else if (bits <= 36) {
13708
        winBits = 3;
13709
    }
13710
    else if (bits <= 140) {
13711
        winBits = 4;
13712
    }
13713
    else {
13714
        winBits = 5;
13715
    }
13716
    /* An entry for each possible 0..2^winBits-1 value. */
13717
    preCnt = 1 << winBits;
13718
    /* Mask for calculating index into pre-computed table. */
13719
    mask = preCnt - 1;
13720
13721
    /* Allocate sp_ints for:
13722
     *  - pre-computation table
13723
     *  - temporary result
13724
     */
13725
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
13726
    if (err == MP_OKAY) {
13727
        /* Set variable to use allocate memory. */
13728
        tr = t[preCnt];
13729
13730
        /* Initialize all allocated. */
13731
        for (i = 0; i < preCnt; i++) {
13732
            _sp_init_size(t[i], m->used * 2 + 1);
13733
        }
13734
        _sp_init_size(tr, m->used * 2 + 1);
13735
13736
        /* 1. t[1] = b mod m. */
13737
        if (_sp_cmp_abs(b, m) != MP_LT) {
13738
            err = sp_mod(b, m, t[1]);
13739
            /* Handle base == modulus. */
13740
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13741
                _sp_set(r, 0);
13742
                done = 1;
13743
            }
13744
        }
13745
        else {
13746
            /* Copy base into entry of table to contain b^1. */
13747
            _sp_copy(b, t[1]);
13748
        }
13749
    }
13750
13751
    if ((!done) && (err == MP_OKAY)) {
13752
        sp_int_digit mp;
13753
        sp_int_digit n;
13754
13755
        /* Calculate Montgomery multiplier for reduction. */
13756
        _sp_mont_setup(m, &mp);
13757
        /* 2. t[0] = MontNorm(m) = ToMont(1) */
13758
        err = sp_mont_norm(t[0], m);
13759
        if (err == MP_OKAY) {
13760
            /* 3. t[1] = ToMont(t[1]) */
13761
            err = sp_mul(t[1], t[0], t[1]);
13762
        }
13763
        if (err == MP_OKAY) {
13764
            /* t[1] = t[1] mod m, temporary size has to be bigger than t[1]. */
13765
            err = _sp_div(t[1], m, NULL, t[1], t[1]->used + 1);
13766
        }
13767
13768
        /* 4. For i in 2..(2 ^ w) - 1 */
13769
        for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
13770
            /* 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2 */
13771
            if ((i & 1) == 0) {
13772
                err = sp_sqr(t[i/2], t[i]);
13773
            }
13774
            /* 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1] */
13775
            else {
13776
                err = sp_mul(t[i-1], t[1], t[i]);
13777
            }
13778
            /* Montgomery reduce square or multiplication result. */
13779
            if (err == MP_OKAY) {
13780
                err = _sp_mont_red(t[i], m, mp, 0);
13781
            }
13782
        }
13783
13784
        if (err == MP_OKAY) {
13785
            /* 5. cb = w * (bits / w) */
13786
            i = (bits - 1) >> SP_WORD_SHIFT;
13787
            n = e->dp[i--];
13788
            /* Find top bit index in last word. */
13789
            c = bits & (SP_WORD_SIZE - 1);
13790
            if (c == 0) {
13791
                c = SP_WORD_SIZE;
13792
            }
13793
            /* Use as many bits from top to make remaining a multiple of window
13794
             * size.
13795
             */
13796
            if ((bits % winBits) != 0) {
13797
                c -= bits % winBits;
13798
            }
13799
            else {
13800
                c -= winBits;
13801
            }
13802
13803
            /* 6. tr = t[e / (2 ^ cb)] */
13804
            y = (int)(n >> c);
13805
            n <<= SP_WORD_SIZE - c;
13806
            /* Copy table value for first window. */
13807
            _sp_copy(t[y], tr);
13808
13809
            /* 7. For i in cb..w */
13810
            for (; (i >= 0) || (c >= winBits); ) {
13811
                int j;
13812
13813
                /* 7.1. y = e[(i-1)..(i-w)] */
13814
                if (c == 0) {
13815
                    /* Bits up to end of digit */
13816
                    n = e->dp[i--];
13817
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
13818
                    n <<= winBits;
13819
                    c = SP_WORD_SIZE - winBits;
13820
                }
13821
                else if (c < winBits) {
13822
                    /* Bits to end of digit and part of next */
13823
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
13824
                    n = e->dp[i--];
13825
                    c = winBits - c;
13826
                    y |= (int)(n >> (SP_WORD_SIZE - c));
13827
                    n <<= c;
13828
                    c = SP_WORD_SIZE - c;
13829
                }
13830
                else {
13831
                    /* Bits from middle of digit */
13832
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
13833
                    n <<= winBits;
13834
                    c -= winBits;
13835
                }
13836
13837
                /* 7.2. tr = tr ^ (2 ^ w) */
13838
                for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
13839
                    err = sp_sqr(tr, tr);
13840
                    if (err == MP_OKAY) {
13841
                        err = _sp_mont_red(tr, m, mp, 0);
13842
                    }
13843
                }
13844
13845
                /* 7.3. tr = tr * t[y] */
13846
                if (err == MP_OKAY) {
13847
                    err = sp_mul(tr, t[y], tr);
13848
                }
13849
                if (err == MP_OKAY) {
13850
                    err = _sp_mont_red(tr, m, mp, 0);
13851
                }
13852
            }
13853
        }
13854
13855
        if (err == MP_OKAY) {
13856
            /* 8. tr = FromMont(tr) */
13857
            err = _sp_mont_red(tr, m, mp, 0);
13858
            /* Reduction implementation returns number to range: 0..m-1. */
13859
        }
13860
    }
13861
    if ((!done) && (err == MP_OKAY)) {
13862
        /* 9. r = tr */
13863
        _sp_copy(tr, r);
13864
    }
13865
13866
    FREE_SP_INT_ARRAY(t, NULL);
13867
    return err;
13868
}
13869
13870
#ifndef SP_ALLOC_PREDEFINED
13871
#undef SP_ALLOC
13872
#undef SP_ALLOC_PREDEFINED
13873
#endif
13874
13875
#endif /* !WC_NO_CACHE_RESISTANT */
13876
#endif /* !WC_NO_HARDEN */
13877
13878
/* w = Log2(SP_WORD_SIZE) - 1 */
13879
#if SP_WORD_SIZE == 8
13880
    #define EXP2_WINSIZE    2
13881
#elif SP_WORD_SIZE == 16
13882
    #define EXP2_WINSIZE    3
13883
#elif SP_WORD_SIZE == 32
13884
    #define EXP2_WINSIZE    4
13885
#elif SP_WORD_SIZE == 64
13886
0
    #define EXP2_WINSIZE    5
13887
#else
13888
    #error "sp_exptmod_base_2: Unexpected SP_WORD_SIZE"
13889
#endif
13890
/* Mask is all bits in window set. */
13891
0
#define EXP2_MASK           ((1 << EXP2_WINSIZE) - 1)
13892
13893
/* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
13894
 * Is constant time and cache attack resistant.
13895
 *
13896
 * Calculates value to make mod operations constant time except when
13897
 * WC_NO_HARDEN defined or modulus fits in one word.
13898
 *
13899
 * Algorithm:
13900
 *  b: base, e: exponent, m: modulus, r: result, digits: #digits to use
13901
 *  w: window size based on #bits in word.
13902
 *  1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
13903
 *     else                 tr = 1
13904
 *  2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
13905
 *     else                            a = 0
13906
 *  3. cb = w * ((digits * SP_WORD_SIZE) / w)
13907
 *  4. y = e / (2 ^ cb)
13908
 *  5. tr = (tr * (2 ^ y) + a) mod m
13909
 *  6. For i in cb..w
13910
 *   6.1. y = e[(i-1)..(i-w)]
13911
 *   6.2. tr = tr ^ (2 ^ w)
13912
 *   6.3. tr = ((tr * (2 ^ y) + a) mod m
13913
 *  7. if Words(m) > 1 then tr = FromMont(tr)
13914
 *  8. r = tr
13915
 *
13916
 * @param [in]  e       SP integer that is the exponent.
13917
 * @param [in]  digits  Number of digits in exponent to use. May be greater than
13918
 *                      count of digits in e.
13919
 * @param [in]  m       SP integer that is the modulus.
13920
 * @param [out] r       SP integer to hold result.
13921
 *
13922
 * @return  MP_OKAY on success.
13923
 * @return  MP_MEM when dynamic memory allocation fails.
13924
 */
13925
static int _sp_exptmod_base_2(const sp_int* e, int digits, const sp_int* m,
13926
    sp_int* r)
13927
0
{
13928
0
    int i = 0;
13929
0
    int c = 0;
13930
0
    int y;
13931
0
    int err = MP_OKAY;
13932
0
    sp_int_digit mp = 0;
13933
0
    sp_int_digit n = 0;
13934
0
#ifndef WC_NO_HARDEN
13935
0
    sp_int* a = NULL;
13936
0
    sp_int* tr = NULL;
13937
0
    DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
13938
#else
13939
    DECL_SP_INT(tr, m->used * 2 + 1);
13940
#endif
13941
0
    int useMont = (m->used > 1);
13942
13943
#if 0
13944
    sp_print_int(2, "a");
13945
    sp_print(e, "b");
13946
    sp_print(m, "m");
13947
#endif
13948
13949
0
#ifndef WC_NO_HARDEN
13950
    /* Allocate sp_ints for:
13951
     *  - constant time add value for mod operation
13952
     *  - temporary result
13953
     */
13954
0
    ALLOC_SP_INT_ARRAY(d, m->used * 2U + 1U, 2, err, NULL);
13955
#else
13956
    /* Allocate sp_int for temporary result. */
13957
    ALLOC_SP_INT(tr, m->used * 2U + 1U, err, NULL);
13958
#endif
13959
0
    if (err == MP_OKAY) {
13960
0
    #ifndef WC_NO_HARDEN
13961
0
        a  = d[0];
13962
0
        tr = d[1];
13963
13964
0
        _sp_init_size(a, (sp_size_t)(m->used * 2 + 1));
13965
0
    #endif
13966
0
        _sp_init_size(tr, (sp_size_t)(m->used * 2 + 1));
13967
13968
0
    }
13969
13970
0
    if ((err == MP_OKAY) && useMont) {
13971
        /* Calculate Montgomery multiplier for reduction. */
13972
0
        _sp_mont_setup(m, &mp);
13973
0
    }
13974
0
    if (err == MP_OKAY) {
13975
        /* 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
13976
         *    else                 tr = 1
13977
         */
13978
0
        if (useMont) {
13979
            /* Calculate Montgomery normalizer for modulus - 1 in Montgomery
13980
             * form.
13981
             */
13982
0
            err = sp_mont_norm(tr, m);
13983
0
        }
13984
0
        else {
13985
             /* For single word modulus don't use Montgomery form. */
13986
0
            err = sp_set(tr, 1);
13987
0
        }
13988
0
    }
13989
    /* 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
13990
     *    else                            a = 0
13991
     */
13992
0
#ifndef WC_NO_HARDEN
13993
0
    if ((err == MP_OKAY) && useMont) {
13994
0
        err = sp_mul_2d(m, 1 << EXP2_WINSIZE, a);
13995
0
    }
13996
0
#endif
13997
13998
0
    if (err == MP_OKAY) {
13999
        /*  3. cb = w * ((digits * SP_WORD_SIZE) / w) */
14000
0
        i = digits - 1;
14001
0
        n = e->dp[i--];
14002
0
        c = SP_WORD_SIZE;
14003
0
    #if EXP2_WINSIZE != 1
14004
0
        c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
14005
0
        if (c != SP_WORD_SIZE) {
14006
            /* 4. y = e / (2 ^ cb) */
14007
0
            y = (int)(n >> c);
14008
0
            n <<= SP_WORD_SIZE - c;
14009
0
        }
14010
0
        else
14011
0
    #endif
14012
0
        {
14013
            /* 4. y = e / (2 ^ cb) */
14014
0
            y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
14015
0
            n <<= EXP2_WINSIZE;
14016
0
            c -= EXP2_WINSIZE;
14017
0
        }
14018
14019
        /* 5. tr = (tr * (2 ^ y) + a) mod m */
14020
0
        err = sp_mul_2d(tr, y, tr);
14021
0
    }
14022
0
#ifndef WC_NO_HARDEN
14023
0
    if ((err == MP_OKAY) && useMont) {
14024
        /* Add value to make mod operation constant time. */
14025
0
        err = sp_add(tr, a, tr);
14026
0
    }
14027
0
#endif
14028
0
    if (err == MP_OKAY) {
14029
0
        err = sp_mod(tr, m, tr);
14030
0
    }
14031
    /* 6. For i in cb..w */
14032
0
    for (; (err == MP_OKAY) && ((i >= 0) || (c >= EXP2_WINSIZE)); ) {
14033
0
        int j;
14034
14035
        /* 6.1. y = e[(i-1)..(i-w)] */
14036
0
        if (c == 0) {
14037
            /* Bits from next digit. */
14038
0
            n = e->dp[i--];
14039
0
            y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
14040
0
            n <<= EXP2_WINSIZE;
14041
0
            c = SP_WORD_SIZE - EXP2_WINSIZE;
14042
0
        }
14043
0
    #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
14044
0
        else if (c < EXP2_WINSIZE) {
14045
            /* Bits to end of digit and part of next */
14046
0
            y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
14047
0
            n = e->dp[i--];
14048
0
            c = EXP2_WINSIZE - c;
14049
0
            y |= (int)(n >> (SP_WORD_SIZE - c));
14050
0
            n <<= c;
14051
0
            c = SP_WORD_SIZE - c;
14052
0
        }
14053
0
    #endif
14054
0
        else {
14055
            /* Bits from middle of digit */
14056
0
            y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
14057
0
            n <<= EXP2_WINSIZE;
14058
0
            c -= EXP2_WINSIZE;
14059
0
        }
14060
14061
        /* 6.2. tr = tr ^ (2 ^ w) */
14062
0
        for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
14063
0
            err = sp_sqr(tr, tr);
14064
0
            if (err == MP_OKAY) {
14065
0
                if (useMont) {
14066
0
                    err = _sp_mont_red(tr, m, mp, 0);
14067
0
                }
14068
0
                else {
14069
0
                    err = sp_mod(tr, m, tr);
14070
0
                }
14071
0
            }
14072
0
        }
14073
14074
        /* 6.3. tr = ((tr * (2 ^ y) + a) mod m */
14075
0
        if (err == MP_OKAY) {
14076
0
            err = sp_mul_2d(tr, y, tr);
14077
0
        }
14078
0
    #ifndef WC_NO_HARDEN
14079
0
        if ((err == MP_OKAY) && useMont) {
14080
            /* Add value to make mod operation constant time. */
14081
0
            err = sp_add(tr, a, tr);
14082
0
        }
14083
0
    #endif
14084
0
        if (err == MP_OKAY) {
14085
            /* Reduce current result by modulus. */
14086
0
            err = sp_mod(tr, m, tr);
14087
0
        }
14088
0
    }
14089
14090
    /* 7. if Words(m) > 1 then tr = FromMont(tr) */
14091
0
    if ((err == MP_OKAY) && useMont) {
14092
0
        err = _sp_mont_red(tr, m, mp, 0);
14093
        /* Reduction implementation returns number to range: 0..m-1. */
14094
0
    }
14095
0
    if (err == MP_OKAY) {
14096
        /* 8. r = tr */
14097
0
        _sp_copy(tr, r);
14098
0
    }
14099
14100
#if 0
14101
    sp_print(r, "rme");
14102
#endif
14103
14104
0
#ifndef WC_NO_HARDEN
14105
0
    FREE_SP_INT_ARRAY(d, NULL);
14106
#else
14107
    FREE_SP_INT(tr, NULL);
14108
#endif
14109
0
    return err;
14110
0
}
14111
#endif
14112
14113
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14114
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
14115
    defined(OPENSSL_ALL)
14116
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14117
 *
14118
 * Error returned when parameters r == e or r == m and base >= modulus.
14119
 *
14120
 * @param [in]  b       SP integer that is the base.
14121
 * @param [in]  e       SP integer that is the exponent.
14122
 * @param [in]  digits  Number of digits in exponent to use. May be greater
14123
 *                      than count of digits in e.
14124
 * @param [in]  m       SP integer that is the modulus.
14125
 * @param [out] r       SP integer to hold result.
14126
 *
14127
 * @return  MP_OKAY on success.
14128
 * @return  MP_VAL when b, e, m or r is NULL, digits is negative, or m <= 0 or
14129
 *          e is negative.
14130
 * @return  MP_MEM when dynamic memory allocation fails.
14131
 */
14132
int sp_exptmod_ex(const sp_int* b, const sp_int* e, int digits, const sp_int* m,
14133
    sp_int* r)
14134
0
{
14135
0
    int err = MP_OKAY;
14136
0
    int done = 0;
14137
0
    int mBits = sp_count_bits(m);
14138
0
    int bBits = sp_count_bits(b);
14139
0
    int eBits = sp_count_bits(e);
14140
14141
0
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL) ||
14142
0
             (digits < 0)) {
14143
0
        err = MP_VAL;
14144
0
    }
14145
    /* Ensure m is not too big. */
14146
0
    else if (m->used * 2 >= SP_INT_DIGITS) {
14147
0
        err = MP_VAL;
14148
0
    }
14149
14150
#if 0
14151
    if (err == MP_OKAY) {
14152
        sp_print(b, "a");
14153
        sp_print(e, "b");
14154
        sp_print(m, "m");
14155
    }
14156
#endif
14157
14158
    /* Check for invalid modulus. */
14159
0
    if ((err == MP_OKAY) && sp_iszero(m)) {
14160
0
        err = MP_VAL;
14161
0
    }
14162
#ifdef WOLFSSL_SP_INT_NEGATIVE
14163
    /* Check for unsupported negative values of exponent and modulus. */
14164
    if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
14165
        err = MP_VAL;
14166
    }
14167
#endif
14168
14169
    /* Check for degenerate cases. */
14170
0
    if ((err == MP_OKAY) && sp_isone(m)) {
14171
0
        _sp_set(r, 0);
14172
0
        done = 1;
14173
0
    }
14174
0
    if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
14175
0
        _sp_set(r, 1);
14176
0
        done = 1;
14177
0
    }
14178
14179
    /* Ensure base is less than modulus. */
14180
0
    if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
14181
0
        if ((r == e) || (r == m)) {
14182
0
            err = MP_VAL;
14183
0
        }
14184
0
        if (err == MP_OKAY) {
14185
0
            err = sp_mod(b, m, r);
14186
0
        }
14187
0
        if (err == MP_OKAY) {
14188
0
            b = r;
14189
0
        }
14190
0
    }
14191
    /* Check for degenerate case of base. */
14192
0
    if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
14193
0
        _sp_set(r, 0);
14194
0
        done = 1;
14195
0
    }
14196
14197
    /* Ensure SP integers have space for intermediate values. */
14198
0
    if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
14199
0
        err = MP_VAL;
14200
0
    }
14201
14202
0
    if ((!done) && (err == MP_OKAY)) {
14203
        /* Use code optimized for specific sizes if possible */
14204
#if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
14205
    ((defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
14206
        defined(WOLFSSL_HAVE_SP_DH))
14207
    #ifndef WOLFSSL_SP_NO_2048
14208
        if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
14209
                (eBits <= 1024)) {
14210
            err = sp_ModExp_1024(b, e, m, r);
14211
            done = 1;
14212
        }
14213
        else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
14214
                 (eBits <= 2048)) {
14215
            err = sp_ModExp_2048(b, e, m, r);
14216
            done = 1;
14217
        }
14218
        else
14219
    #endif
14220
    #ifndef WOLFSSL_SP_NO_3072
14221
        if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
14222
                (eBits <= 1536)) {
14223
            err = sp_ModExp_1536(b, e, m, r);
14224
            done = 1;
14225
        }
14226
        else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
14227
                 (eBits <= 3072)) {
14228
            err = sp_ModExp_3072(b, e, m, r);
14229
            done = 1;
14230
        }
14231
        else
14232
    #endif
14233
    #ifdef WOLFSSL_SP_4096
14234
        if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
14235
                (eBits <= 4096)) {
14236
            err = sp_ModExp_4096(b, e, m, r);
14237
            done = 1;
14238
        }
14239
        else
14240
    #endif
14241
#endif
14242
0
        {
14243
            /* SP does not support size. */
14244
0
        }
14245
0
    }
14246
0
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(OPENSSL_ALL)
14247
#if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
14248
    defined(NO_DH)
14249
    if ((!done) && (err == MP_OKAY)) {
14250
        /* Use non-constant time version - fastest. */
14251
        err = sp_exptmod_nct(b, e, m, r);
14252
    }
14253
#else
14254
0
#if defined(WOLFSSL_SP_MATH_ALL) || defined(OPENSSL_ALL)
14255
0
    if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
14256
0
         mp_isodd(m)) {
14257
        /* Use the generic base 2 implementation. */
14258
0
        err = _sp_exptmod_base_2(e, digits, m, r);
14259
0
    }
14260
0
    else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
14261
0
    #ifndef WC_NO_HARDEN
14262
        /* Use constant time version hardened against timing attacks and
14263
         * cache attacks when WC_NO_CACHE_RESISTANT not defined. */
14264
0
        err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
14265
    #else
14266
        /* Use non-constant time version - fastest. */
14267
        err = sp_exptmod_nct(b, e, m, r);
14268
    #endif
14269
0
    }
14270
0
    else
14271
0
#endif /* WOLFSSL_SP_MATH_ALL || OPENSSL_ALL */
14272
0
    if ((!done) && (err == MP_OKAY)) {
14273
        /* Otherwise use the generic implementation hardened against
14274
         * timing and cache attacks. */
14275
0
        err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
14276
0
    }
14277
0
#endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
14278
#else
14279
    if ((!done) && (err == MP_OKAY)) {
14280
        err = MP_VAL;
14281
    }
14282
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
14283
14284
0
    (void)mBits;
14285
0
    (void)bBits;
14286
0
    (void)eBits;
14287
0
    (void)digits;
14288
14289
#if 0
14290
    if (err == MP_OKAY) {
14291
        sp_print(r, "rme");
14292
    }
14293
#endif
14294
0
    return err;
14295
0
}
14296
#endif
14297
14298
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14299
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
14300
    defined(OPENSSL_ALL)
14301
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14302
 *
14303
 * @param [in]  b  SP integer that is the base.
14304
 * @param [in]  e  SP integer that is the exponent.
14305
 * @param [in]  m  SP integer that is the modulus.
14306
 * @param [out] r  SP integer to hold result.
14307
 *
14308
 * @return  MP_OKAY on success.
14309
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14310
 * @return  MP_MEM when dynamic memory allocation fails.
14311
 */
14312
int sp_exptmod(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
14313
0
{
14314
0
    int err = MP_OKAY;
14315
14316
    /* Validate parameters. */
14317
0
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
14318
0
        err = MP_VAL;
14319
0
    }
14320
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
14321
0
    if (err == MP_OKAY) {
14322
0
        err = sp_exptmod_ex(b, e, (int)e->used, m, r);
14323
0
    }
14324
0
    RESTORE_VECTOR_REGISTERS();
14325
0
    return err;
14326
0
}
14327
#endif
14328
14329
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
14330
#if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
14331
14332
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14333
 * Creates a window of precalculated exponents with base in Montgomery form.
14334
 * Sliding window and is NOT constant time.
14335
 *
14336
 * n-bit window is: (b^(2^(n-1))*b^0)...(b^(2^(n-1))*b^(2^(n-1)-1))
14337
 * e.g. when n=6, b^32..b^63
14338
 * Algorithm:
14339
 *   1. Ensure base is less than modulus.
14340
 *   2. Convert base to Montgomery form
14341
 *   3. Set result to table entry for top window bits, or
14342
 *      if less than window bits in exponent, 1 in Montgomery form.
14343
 *   4. While at least window bits left:
14344
 *     4.1. Count number of bits and skip leading 0 bits unless less than window
14345
 *          bits left.
14346
 *     4.2. Montgomery square result for each leading 0 and window bits if bits
14347
 *          left.
14348
 *     4.3. Break if less than window bits left.
14349
 *     4.4. Get top window bits from exponent and drop.
14350
 *     4.5. Montgomery multiply result by table entry.
14351
 *   5. While bits left:
14352
 *     5.1. Montgomery square result
14353
 *     5.2. If exponent bit set
14354
 *       5.2.1. Montgomery multiply result by Montgomery form of base.
14355
 *   6. Convert result back from Montgomery form.
14356
 *
14357
 * @param [in]  b  SP integer that is the base.
14358
 * @param [in]  e  SP integer that is the exponent.
14359
 * @param [in]  m  SP integer that is the modulus.
14360
 * @param [out] r  SP integer to hold result.
14361
 *
14362
 * @return  MP_OKAY on success.
14363
 * @return  MP_MEM when dynamic memory allocation fails.
14364
 */
14365
static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
14366
    sp_int* r)
14367
0
{
14368
0
    int i = 0;
14369
0
    int bits;
14370
0
    int winBits;
14371
0
    int preCnt;
14372
0
    int err = MP_OKAY;
14373
0
    int done = 0;
14374
0
    sp_int* tr = NULL;
14375
0
    sp_int* bm = NULL;
14376
    /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
14377
0
#ifndef WOLFSSL_SP_NO_MALLOC
14378
0
    DECL_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
14379
#else
14380
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
14381
#endif
14382
14383
0
    bits = sp_count_bits(e);
14384
14385
    /* Window bits based on number of pre-calculations versus number of loop
14386
     * calculations.
14387
     * Exponents for RSA and DH will result in 6-bit windows.
14388
     * Note: for 4096-bit values, 7-bit window is slightly better.
14389
     */
14390
0
    if (bits > 450) {
14391
0
        winBits = 6;
14392
0
    }
14393
0
    else if (bits <= 21) {
14394
0
        winBits = 2;
14395
0
    }
14396
0
    else if (bits <= 36) {
14397
0
        winBits = 3;
14398
0
    }
14399
0
    else if (bits <= 140) {
14400
0
        winBits = 4;
14401
0
    }
14402
0
    else {
14403
0
        winBits = 5;
14404
0
    }
14405
    /* Top bit of exponent fixed as 1 for pre-calculated window. */
14406
0
    preCnt = 1 << (winBits - 1);
14407
14408
    /* Allocate sp_ints for:
14409
     *  - pre-computation table
14410
     *  - temporary result
14411
     *  - Montgomery form of base
14412
     */
14413
0
#ifndef WOLFSSL_SP_NO_MALLOC
14414
0
    ALLOC_DYN_SP_INT_ARRAY(t, m->used * 2U + 1U, (size_t)preCnt + 2, err, NULL);
14415
#else
14416
    ALLOC_SP_INT_ARRAY(t, m->used * 2U + 1U, (size_t)preCnt + 2, err, NULL);
14417
#endif
14418
0
    if (err == MP_OKAY) {
14419
        /* Set variables to use allocate memory. */
14420
0
        tr = t[preCnt + 0];
14421
0
        bm = t[preCnt + 1];
14422
14423
        /* Initialize all allocated  */
14424
0
        for (i = 0; i < preCnt; i++) {
14425
0
            _sp_init_size(t[i], (sp_size_t)(m->used * 2 + 1));
14426
0
        }
14427
0
        _sp_init_size(tr, (sp_size_t)(m->used * 2 + 1));
14428
0
        _sp_init_size(bm, (sp_size_t)(m->used * 2 + 1));
14429
14430
        /* 1. Ensure base is less than modulus. */
14431
0
        if (_sp_cmp_abs(b, m) != MP_LT) {
14432
0
            err = sp_mod(b, m, bm);
14433
            /* Handle base == modulus. */
14434
0
            if ((err == MP_OKAY) && sp_iszero(bm)) {
14435
0
                _sp_set(r, 0);
14436
0
                done = 1;
14437
0
            }
14438
0
        }
14439
0
        else {
14440
            /* Copy base into Montgomery base variable. */
14441
0
            _sp_copy(b, bm);
14442
0
        }
14443
0
    }
14444
14445
0
    if ((!done) && (err == MP_OKAY)) {
14446
0
        int y = 0;
14447
0
        int c = 0;
14448
0
        sp_int_digit mp;
14449
14450
        /* Calculate Montgomery multiplier for reduction. */
14451
0
        _sp_mont_setup(m, &mp);
14452
        /* Calculate Montgomery normalizer for modulus. */
14453
0
        err = sp_mont_norm(t[0], m);
14454
0
        if (err == MP_OKAY) {
14455
            /* 2. Convert base to Montgomery form. */
14456
0
            err = sp_mul(bm, t[0], bm);
14457
0
        }
14458
0
        if (err == MP_OKAY) {
14459
            /* bm = bm mod m, temporary size has to be bigger than bm->used. */
14460
0
            err = _sp_div(bm, m, NULL, bm, bm->used + 1U);
14461
0
        }
14462
0
        if (err == MP_OKAY) {
14463
            /* Copy Montgomery form of base into first element of table. */
14464
0
            _sp_copy(bm, t[0]);
14465
0
        }
14466
        /* Calculate b^(2^(winBits-1)) */
14467
0
        for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
14468
0
            err = sp_sqr(t[0], t[0]);
14469
0
            if (err == MP_OKAY) {
14470
0
                err = _sp_mont_red(t[0], m, mp, 0);
14471
0
            }
14472
0
        }
14473
        /* For each table entry after first. */
14474
0
        for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
14475
            /* Multiply previous entry by the base in Mont form into table. */
14476
0
            err = sp_mul(t[i-1], bm, t[i]);
14477
0
            if (err == MP_OKAY) {
14478
0
                err = _sp_mont_red(t[i], m, mp, 0);
14479
0
            }
14480
0
        }
14481
14482
        /* 3. Set result to table entry for top window bits, or
14483
         *    if less than window bits in exponent, 1 in Montgomery form.
14484
         */
14485
0
        if (err == MP_OKAY) {
14486
0
            sp_int_digit n;
14487
            /* Mask for calculating index into pre-computed table. */
14488
0
            sp_int_digit mask = (sp_int_digit)preCnt - 1;
14489
14490
            /* Find the top bit. */
14491
0
            i = (bits - 1) >> SP_WORD_SHIFT;
14492
0
            n = e->dp[i--];
14493
0
            c = bits % SP_WORD_SIZE;
14494
0
            if (c == 0) {
14495
0
                c = SP_WORD_SIZE;
14496
0
            }
14497
            /* Put top bit at highest offset in digit. */
14498
0
            n <<= SP_WORD_SIZE - c;
14499
14500
0
            if (bits >= winBits) {
14501
                /* Top bit set. Copy from window. */
14502
0
                if (c < winBits) {
14503
                    /* Bits to end of digit and part of next */
14504
0
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
14505
0
                    n = e->dp[i--];
14506
0
                    c = winBits - c;
14507
0
                    y |= (int)(n >> (SP_WORD_SIZE - c));
14508
0
                    n <<= c;
14509
0
                    c = SP_WORD_SIZE - c;
14510
0
                }
14511
0
                else {
14512
                    /* Bits from middle of digit */
14513
0
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
14514
0
                    n <<= winBits;
14515
0
                    c -= winBits;
14516
0
                }
14517
0
                _sp_copy(t[y], tr);
14518
0
            }
14519
0
            else {
14520
                /* 1 in Montgomery form. */
14521
0
                err = sp_mont_norm(tr, m);
14522
0
            }
14523
14524
            /* 4. While at least window bits left. */
14525
0
            while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
14526
                /* Number of squares to before due to top bits being 0. */
14527
0
                int sqrs = 0;
14528
14529
                /* 4.1. Count number of bits and skip leading 0 bits unless less
14530
                 *      than window bits.
14531
                 */
14532
0
                do {
14533
                    /* Make sure n has bits from the right digit. */
14534
0
                    if (c == 0) {
14535
0
                        n = e->dp[i--];
14536
0
                        c = SP_WORD_SIZE;
14537
0
                    }
14538
                    /* Mask off the next bit. */
14539
0
                    if ((n & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) != 0) {
14540
0
                        break;
14541
0
                    }
14542
14543
                    /* Another square needed. */
14544
0
                    sqrs++;
14545
                    /* Skip bit. */
14546
0
                    n <<= 1;
14547
0
                    c--;
14548
0
                }
14549
0
                while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits)));
14550
14551
0
                if ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
14552
                    /* Add squares needed before using table entry. */
14553
0
                    sqrs += winBits;
14554
0
                }
14555
14556
                /* 4.2. Montgomery square result for each leading 0 and window
14557
                 *      bits if bits left.
14558
                 */
14559
0
                for (; (err == MP_OKAY) && (sqrs > 0); sqrs--) {
14560
0
                    err = sp_sqr(tr, tr);
14561
0
                    if (err == MP_OKAY) {
14562
0
                        err = _sp_mont_red(tr, m, mp, 0);
14563
0
                    }
14564
0
                }
14565
14566
                /* 4.3. Break if less than window bits left. */
14567
0
                if ((err == MP_OKAY) && (i < 0) && (c < winBits)) {
14568
0
                    break;
14569
0
                }
14570
14571
                /* 4.4. Get top window bits from exponent and drop. */
14572
0
                if (err == MP_OKAY) {
14573
0
                    if (c == 0) {
14574
                        /* Bits from next digit. */
14575
0
                        n = e->dp[i--];
14576
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14577
0
                        n <<= winBits;
14578
0
                        c = SP_WORD_SIZE - winBits;
14579
0
                    }
14580
0
                    else if (c < winBits) {
14581
                        /* Bits to end of digit and part of next. */
14582
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14583
0
                        n = e->dp[i--];
14584
0
                        c = winBits - c;
14585
0
                        y |= (int)(n >> (SP_WORD_SIZE - c));
14586
0
                        n <<= c;
14587
0
                        c = SP_WORD_SIZE - c;
14588
0
                    }
14589
0
                    else {
14590
                        /* Bits from middle of digit. */
14591
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14592
0
                        n <<= winBits;
14593
0
                        c -= winBits;
14594
0
                    }
14595
0
                    y &= (int)mask;
14596
0
                }
14597
14598
                /* 4.5. Montgomery multiply result by table entry. */
14599
0
                if (err == MP_OKAY) {
14600
0
                    err = sp_mul(tr, t[y], tr);
14601
0
                }
14602
0
                if (err == MP_OKAY) {
14603
0
                    err = _sp_mont_red(tr, m, mp, 0);
14604
0
                }
14605
0
            }
14606
14607
            /* Finished multiplying in table entries. */
14608
0
            if ((err == MP_OKAY) && (c > 0)) {
14609
                /* Handle remaining bits.
14610
                 * Window values have top bit set and can't be used. */
14611
0
                n = e->dp[0];
14612
                /*  5. While bits left: */
14613
0
                for (--c; (err == MP_OKAY) && (c >= 0); c--) {
14614
                    /* 5.1. Montgomery square result */
14615
0
                    err = sp_sqr(tr, tr);
14616
0
                    if (err == MP_OKAY) {
14617
0
                        err = _sp_mont_red(tr, m, mp, 0);
14618
0
                    }
14619
                    /* 5.2. If exponent bit set */
14620
0
                    if ((err == MP_OKAY) && ((n >> c) & 1)) {
14621
                        /* 5.2.1. Montgomery multiply result by Montgomery form
14622
                         * of base.
14623
                         */
14624
0
                        err = sp_mul(tr, bm, tr);
14625
0
                        if (err == MP_OKAY) {
14626
0
                            err = _sp_mont_red(tr, m, mp, 0);
14627
0
                        }
14628
0
                    }
14629
0
                }
14630
0
            }
14631
0
        }
14632
14633
0
        if (err == MP_OKAY) {
14634
            /* 6. Convert result back from Montgomery form. */
14635
0
            err = _sp_mont_red(tr, m, mp, 0);
14636
            /* Reduction implementation returns number to range: 0..m-1. */
14637
0
        }
14638
0
    }
14639
0
    if ((!done) && (err == MP_OKAY)) {
14640
        /* Copy temporary result into parameter. */
14641
0
        _sp_copy(tr, r);
14642
0
    }
14643
14644
0
#ifndef WOLFSSL_SP_NO_MALLOC
14645
0
    FREE_DYN_SP_INT_ARRAY(t, NULL);
14646
#else
14647
    FREE_SP_INT_ARRAY(t, NULL);
14648
#endif
14649
0
    return err;
14650
0
}
14651
14652
#else
14653
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14654
 * Non-constant time implementation.
14655
 *
14656
 * Algorithm:
14657
 *   1. Convert base to Montgomery form
14658
 *   2. Set result to base (assumes exponent is not zero)
14659
 *   3. For each bit in exponent starting at second highest
14660
 *     3.1. Montgomery square result
14661
 *     3.2. If exponent bit set
14662
 *       3.2.1. Montgomery multiply result by Montgomery form of base.
14663
 *   4. Convert result back from Montgomery form.
14664
 *
14665
 * @param [in]  b  SP integer that is the base.
14666
 * @param [in]  e  SP integer that is the exponent.
14667
 * @param [in]  m  SP integer that is the modulus.
14668
 * @param [out] r  SP integer to hold result.
14669
 *
14670
 * @return  MP_OKAY on success.
14671
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14672
 * @return  MP_MEM when dynamic memory allocation fails.
14673
 */
14674
static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
14675
    sp_int* r)
14676
{
14677
    int i;
14678
    int err = MP_OKAY;
14679
    int done = 0;
14680
    int y = 0;
14681
    int bits = sp_count_bits(e);
14682
    sp_int_digit mp;
14683
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
14684
14685
    /* Allocate memory for:
14686
     *  - Montgomery form of base
14687
     *  - Temporary result (in case r is same var as another parameter). */
14688
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
14689
    if (err == MP_OKAY) {
14690
        _sp_init_size(t[0], m->used * 2 + 1);
14691
        _sp_init_size(t[1], m->used * 2 + 1);
14692
14693
        /* Ensure base is less than modulus and copy into temp. */
14694
        if (_sp_cmp_abs(b, m) != MP_LT) {
14695
            err = sp_mod(b, m, t[0]);
14696
            /* Handle base == modulus. */
14697
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
14698
                _sp_set(r, 0);
14699
                done = 1;
14700
            }
14701
        }
14702
        else {
14703
            /* Copy base into temp. */
14704
            _sp_copy(b, t[0]);
14705
        }
14706
    }
14707
14708
    if ((!done) && (err == MP_OKAY)) {
14709
        /* Calculate Montgomery multiplier for reduction. */
14710
        _sp_mont_setup(m, &mp);
14711
        /* Calculate Montgomery normalizer for modulus. */
14712
        err = sp_mont_norm(t[1], m);
14713
        if (err == MP_OKAY) {
14714
            /* 1. Convert base to Montgomery form. */
14715
            err = sp_mul(t[0], t[1], t[0]);
14716
        }
14717
        if (err == MP_OKAY) {
14718
            /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
14719
            err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
14720
        }
14721
        if (err == MP_OKAY) {
14722
            /* 2. Result starts as Montgomery form of base (assuming e > 0). */
14723
            _sp_copy(t[0], t[1]);
14724
        }
14725
14726
        /* 3. For each bit in exponent starting at second highest. */
14727
        for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
14728
            /* 3.1. Montgomery square result. */
14729
            err = sp_sqr(t[0], t[0]);
14730
            if (err == MP_OKAY) {
14731
                err = _sp_mont_red(t[0], m, mp, 0);
14732
            }
14733
            if (err == MP_OKAY) {
14734
                /* Get bit and index i. */
14735
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
14736
                /* 3.2. If exponent bit set */
14737
                if (y != 0) {
14738
                    /* 3.2.1. Montgomery multiply result by Mont of base. */
14739
                    err = sp_mul(t[0], t[1], t[0]);
14740
                    if (err == MP_OKAY) {
14741
                        err = _sp_mont_red(t[0], m, mp, 0);
14742
                    }
14743
                }
14744
            }
14745
        }
14746
        if (err == MP_OKAY) {
14747
            /* 4. Convert from Montgomery form. */
14748
            err = _sp_mont_red(t[0], m, mp, 0);
14749
            /* Reduction implementation returns number of range 0..m-1. */
14750
        }
14751
    }
14752
    if ((!done) && (err == MP_OKAY)) {
14753
        /* Copy temporary result into parameter. */
14754
        _sp_copy(t[0], r);
14755
    }
14756
14757
    FREE_SP_INT_ARRAY(t, NULL);
14758
    return err;
14759
}
14760
#endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
14761
14762
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14763
 * Non-constant time implementation.
14764
 *
14765
 * @param [in]  b  SP integer that is the base.
14766
 * @param [in]  e  SP integer that is the exponent.
14767
 * @param [in]  m  SP integer that is the modulus.
14768
 * @param [out] r  SP integer to hold result.
14769
 *
14770
 * @return  MP_OKAY on success.
14771
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14772
 * @return  MP_MEM when dynamic memory allocation fails.
14773
 */
14774
int sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
14775
0
{
14776
0
    int err = MP_OKAY;
14777
14778
    /* Validate parameters. */
14779
0
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
14780
0
        err = MP_VAL;
14781
0
    }
14782
14783
#if 0
14784
    if (err == MP_OKAY) {
14785
        sp_print(b, "a");
14786
        sp_print(e, "b");
14787
        sp_print(m, "m");
14788
    }
14789
#endif
14790
14791
0
    if (err != MP_OKAY) {
14792
0
    }
14793
    /* Handle special cases. */
14794
0
    else if (sp_iszero(m)) {
14795
0
        err = MP_VAL;
14796
0
    }
14797
#ifdef WOLFSSL_SP_INT_NEGATIVE
14798
    else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
14799
        err = MP_VAL;
14800
    }
14801
#endif
14802
    /* x mod 1 is always 0. */
14803
0
    else if (sp_isone(m)) {
14804
0
        _sp_set(r, 0);
14805
0
    }
14806
    /* b^0 mod m = 1 mod m = 1. */
14807
0
    else if (sp_iszero(e)) {
14808
0
        _sp_set(r, 1);
14809
0
    }
14810
    /* 0^x mod m = 0 mod m = 0. */
14811
0
    else if (sp_iszero(b)) {
14812
0
        _sp_set(r, 0);
14813
0
    }
14814
    /* Ensure SP integers have space for intermediate values. */
14815
0
    else if (m->used * 2 >= r->size) {
14816
0
        err = MP_VAL;
14817
0
    }
14818
0
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
14819
0
    else if (mp_iseven(m)) {
14820
0
        err = _sp_exptmod_ex(b, e, (int)(e->used * SP_WORD_SIZE), m, r);
14821
0
    }
14822
0
#endif
14823
0
    else {
14824
0
        err = _sp_exptmod_nct(b, e, m, r);
14825
0
    }
14826
14827
#if 0
14828
    if (err == MP_OKAY) {
14829
        sp_print(r, "rme");
14830
    }
14831
#endif
14832
14833
0
    return err;
14834
0
}
14835
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
14836
14837
/***************
14838
 * 2^e functions
14839
 ***************/
14840
14841
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
14842
/* Divide by 2^e: r = a >> e and rem = bits shifted out
14843
 *
14844
 * @param [in]  a    SP integer to divide.
14845
 * @param [in]  e    Exponent bits (dividing by 2^e).
14846
 * @param [out] r    SP integer to hold result.
14847
 * @param [out] rem  SP integer to hold remainder.
14848
 *
14849
 * @return  MP_OKAY on success.
14850
 * @return  MP_VAL when a or r is NULL or e is negative.
14851
 */
14852
int sp_div_2d(const sp_int* a, int e, sp_int* r, sp_int* rem)
14853
0
{
14854
0
    int err = MP_OKAY;
14855
14856
0
    if ((a == NULL) || (r == NULL) || (e < 0)) {
14857
0
        err = MP_VAL;
14858
0
    }
14859
14860
0
    if (err == MP_OKAY) {
14861
        /* Number of bits remaining after shift. */
14862
0
        int remBits = sp_count_bits(a) - e;
14863
14864
0
        if (remBits <= 0) {
14865
            /* Shifting down by more bits than in number. */
14866
0
            _sp_zero(r);
14867
0
            if (rem != NULL) {
14868
0
                err = sp_copy(a, rem);
14869
0
            }
14870
0
        }
14871
0
        else {
14872
0
            if (rem != NULL) {
14873
                /* Copy a into remainder. */
14874
0
                err = sp_copy(a, rem);
14875
0
            }
14876
0
            if (err == MP_OKAY) {
14877
                /* Shift a down by into result. */
14878
0
                err = sp_rshb(a, e, r);
14879
0
            }
14880
0
            if ((err == MP_OKAY) && (rem != NULL)) {
14881
                /* Set used and mask off top digit of remainder. */
14882
0
                rem->used = (sp_size_t)((e + SP_WORD_SIZE - 1) >>
14883
0
                                        SP_WORD_SHIFT);
14884
0
                e &= SP_WORD_MASK;
14885
0
                if (e > 0) {
14886
0
                    rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
14887
0
                }
14888
14889
                /* Remove leading zeros from remainder. */
14890
0
                sp_clamp(rem);
14891
            #ifdef WOLFSSL_SP_INT_NEGATIVE
14892
                rem->sign = MP_ZPOS;
14893
            #endif
14894
0
            }
14895
0
        }
14896
0
    }
14897
14898
0
    return err;
14899
0
}
14900
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
14901
14902
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14903
    defined(HAVE_ECC)
14904
/* The bottom e bits: r = a & ((1 << e) - 1)
14905
 *
14906
 * @param [in]  a  SP integer to reduce.
14907
 * @param [in]  e  Modulus bits (modulus equals 2^e).
14908
 * @param [out] r  SP integer to hold result.
14909
 *
14910
 * @return  MP_OKAY on success.
14911
 * @return  MP_VAL when a or r is NULL, e is negative or e is too large for
14912
 *          result.
14913
 */
14914
int sp_mod_2d(const sp_int* a, int e, sp_int* r)
14915
0
{
14916
0
    int err = MP_OKAY;
14917
0
    sp_size_t digits = (sp_size_t)((e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT);
14918
14919
0
    if ((a == NULL) || (r == NULL) || (e < 0)) {
14920
0
        err = MP_VAL;
14921
0
    }
14922
0
    if ((err == MP_OKAY) && (digits > r->size)) {
14923
0
        err = MP_VAL;
14924
0
    }
14925
14926
0
    if (err == MP_OKAY) {
14927
        /* Copy a into r if not same pointer. */
14928
0
        if (a != r) {
14929
0
            sp_size_t cnt = (a->used < digits) ? a->used : digits;
14930
0
            XMEMCPY(r->dp, a->dp, cnt * (word32)SP_WORD_SIZEOF);
14931
0
            r->used = a->used;
14932
        #ifdef WOLFSSL_SP_INT_NEGATIVE
14933
            r->sign = a->sign;
14934
        #endif
14935
0
        }
14936
14937
        /* Modify result if a is bigger or same digit size. */
14938
0
    #ifndef WOLFSSL_SP_INT_NEGATIVE
14939
0
        if (digits <= a->used)
14940
    #else
14941
        /* Need to make negative positive and mask. */
14942
        if ((a->sign == MP_NEG) || (digits <= a->used))
14943
    #endif
14944
0
        {
14945
        #ifdef WOLFSSL_SP_INT_NEGATIVE
14946
            if (a->sign == MP_NEG) {
14947
                unsigned int i;
14948
                sp_int_digit carry = 0;
14949
                sp_size_t cnt = (r->used < digits) ? r->used : digits;
14950
14951
                /* Negate value. */
14952
                for (i = 0; i < cnt; i++) {
14953
                    sp_int_digit next = r->dp[i] > 0;
14954
                    r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
14955
                    carry |= next;
14956
                }
14957
                for (; i < digits; i++) {
14958
                    r->dp[i] = (sp_int_digit)0 - carry;
14959
                }
14960
                r->sign = MP_ZPOS;
14961
            }
14962
        #endif
14963
            /* Set used and mask off top digit of result. */
14964
0
            r->used = digits;
14965
0
            e &= SP_WORD_MASK;
14966
0
            if (e > 0) {
14967
0
                r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
14968
0
            }
14969
0
            sp_clamp(r);
14970
0
        }
14971
0
    }
14972
14973
0
    return err;
14974
0
}
14975
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY)) || HAVE_ECC */
14976
14977
#if (defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
14978
    !defined(NO_DH))) || defined(OPENSSL_ALL)
14979
/* Multiply by 2^e: r = a << e
14980
 *
14981
 * @param [in]  a  SP integer to multiply.
14982
 * @param [in]  e  Multiplier bits (multiplier equals 2^e).
14983
 * @param [out] r  SP integer to hold result.
14984
 *
14985
 * @return  MP_OKAY on success.
14986
 * @return  MP_VAL when a or r is NULL, e is negative, or result is too big for
14987
 *          result size.
14988
 */
14989
int sp_mul_2d(const sp_int* a, int e, sp_int* r)
14990
0
{
14991
0
    int err = MP_OKAY;
14992
14993
    /* Validate parameters. */
14994
0
    if ((a == NULL) || (r == NULL) || (e < 0)) {
14995
0
        err = MP_VAL;
14996
0
    }
14997
14998
    /* Ensure r has enough allocated digits for result. */
14999
0
    if ((err == MP_OKAY) &&
15000
0
            ((unsigned int)(sp_count_bits(a) + e) >
15001
0
             (unsigned int)r->size * SP_WORD_SIZE)) {
15002
0
        err = MP_VAL;
15003
0
    }
15004
15005
0
    if (err == MP_OKAY) {
15006
        /* Copy a into r as left shift function works on the number. */
15007
0
        if (a != r) {
15008
0
            err = sp_copy(a, r);
15009
0
        }
15010
0
    }
15011
15012
0
    if (err == MP_OKAY) {
15013
#if 0
15014
        sp_print(a, "a");
15015
        sp_print_int(e, "n");
15016
#endif
15017
0
        err = sp_lshb(r, e);
15018
#if 0
15019
        sp_print(r, "rsl");
15020
#endif
15021
0
    }
15022
15023
0
    return err;
15024
0
}
15025
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
15026
15027
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
15028
    defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
15029
15030
/* START SP_SQR implementations */
15031
/* This code is generated.
15032
 * To generate:
15033
 *   cd scripts/sp/sp_int
15034
 *   ./gen.sh
15035
 * File sp_sqr.c contains code.
15036
 */
15037
15038
#if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
15039
#ifdef SQR_MUL_ASM
15040
/* Square a and store in r. r = a * a
15041
 *
15042
 * @param [in]  a  SP integer to square.
15043
 * @param [out] r  SP integer result.
15044
 *
15045
 * @return  MP_OKAY on success.
15046
 * @return  MP_MEM when dynamic memory allocation fails.
15047
 */
15048
static int _sp_sqr(const sp_int* a, sp_int* r)
15049
0
{
15050
0
    int err = MP_OKAY;
15051
0
    sp_size_t i;
15052
0
    int j;
15053
0
    sp_size_t k;
15054
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15055
    sp_int_digit* t = NULL;
15056
#elif defined(WOLFSSL_SP_DYN_STACK)
15057
    sp_int_digit t[((a->used + 1) / 2) * 2 + 1];
15058
#else
15059
    sp_int_digit t[(SP_INT_DIGITS + 1) / 2];
15060
#endif
15061
15062
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15063
    t = (sp_int_digit*)XMALLOC(
15064
        sizeof(sp_int_digit) * (size_t)(((a->used + 1) / 2) * 2 + 1), NULL,
15065
        DYNAMIC_TYPE_BIGINT);
15066
    if (t == NULL) {
15067
        err = MP_MEM;
15068
    }
15069
#endif
15070
0
    if ((err == MP_OKAY) && (a->used <= 1)) {
15071
0
        sp_int_digit l;
15072
0
        sp_int_digit h;
15073
15074
0
        h = 0;
15075
0
        l = 0;
15076
0
        SP_ASM_SQR(h, l, a->dp[0]);
15077
0
        r->dp[0] = h;
15078
0
        r->dp[1] = l;
15079
0
    }
15080
0
    else if (err == MP_OKAY) {
15081
0
        sp_int_digit l;
15082
0
        sp_int_digit h;
15083
0
        sp_int_digit o;
15084
0
        sp_int_digit* p = t;
15085
15086
0
        h = 0;
15087
0
        l = 0;
15088
0
        SP_ASM_SQR(h, l, a->dp[0]);
15089
0
        t[0] = h;
15090
0
        h = 0;
15091
0
        o = 0;
15092
0
        for (k = 1; k < (sp_size_t)((a->used + 1) / 2); k++) {
15093
0
            i = k;
15094
0
            j = (int)(k - 1);
15095
0
            for (; (j >= 0); i++, j--) {
15096
0
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
15097
0
            }
15098
0
            t[k * 2 - 1] = l;
15099
0
            l = h;
15100
0
            h = o;
15101
0
            o = 0;
15102
15103
0
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
15104
0
            i = (sp_size_t)(k + 1);
15105
0
            j = (int)(k - 1);
15106
0
            for (; (j >= 0); i++, j--) {
15107
0
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
15108
0
            }
15109
0
            t[k * 2] = l;
15110
0
            l = h;
15111
0
            h = o;
15112
0
            o = 0;
15113
0
        }
15114
0
        for (; k < a->used; k++) {
15115
0
            i = k;
15116
0
            j = (int)(k - 1);
15117
0
            for (; (i < a->used); i++, j--) {
15118
0
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
15119
0
            }
15120
0
            p[k * 2 - 1] = l;
15121
0
            l = h;
15122
0
            h = o;
15123
0
            o = 0;
15124
15125
0
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
15126
0
            i = (sp_size_t)(k + 1);
15127
0
            j = (int)(k - 1);
15128
0
            for (; (i < a->used); i++, j--) {
15129
0
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
15130
0
            }
15131
0
            p[k * 2] = l;
15132
0
            l = h;
15133
0
            h = o;
15134
0
            o = 0;
15135
15136
0
            p = r->dp;
15137
0
        }
15138
0
        r->dp[k * 2 - 1] = l;
15139
0
        XMEMCPY(r->dp, t, (size_t)(((a->used + 1) / 2) * 2 + 1) *
15140
0
            sizeof(sp_int_digit));
15141
0
    }
15142
15143
0
    if (err == MP_OKAY) {
15144
0
        r->used = (sp_size_t)(a->used * 2U);
15145
0
        sp_clamp(r);
15146
0
    }
15147
15148
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15149
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
15150
#endif
15151
0
    return err;
15152
0
}
15153
#else /* !SQR_MUL_ASM */
15154
/* Square a and store in r. r = a * a
15155
 *
15156
 * @param [in]  a  SP integer to square.
15157
 * @param [out] r  SP integer result.
15158
 *
15159
 * @return  MP_OKAY on success.
15160
 * @return  MP_MEM when dynamic memory allocation fails.
15161
 */
15162
static int _sp_sqr(const sp_int* a, sp_int* r)
15163
{
15164
    int err = MP_OKAY;
15165
    sp_size_t i;
15166
    int j;
15167
    sp_size_t k;
15168
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15169
    sp_int_digit* t = NULL;
15170
#elif defined(WOLFSSL_SP_DYN_STACK)
15171
    sp_int_digit t[a->used * 2];
15172
#else
15173
    sp_int_digit t[SP_INT_DIGITS];
15174
#endif
15175
15176
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15177
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (size_t)(a->used * 2),
15178
        NULL, DYNAMIC_TYPE_BIGINT);
15179
    if (t == NULL) {
15180
        err = MP_MEM;
15181
    }
15182
#endif
15183
    if (err == MP_OKAY) {
15184
    #ifndef WOLFSSL_SP_INT_SQR_VOLATILE
15185
        sp_int_word w;
15186
        sp_int_word l;
15187
        sp_int_word h;
15188
    #else
15189
        volatile sp_int_word w;
15190
        volatile sp_int_word l;
15191
        volatile sp_int_word h;
15192
    #endif
15193
    #ifdef SP_WORD_OVERFLOW
15194
        sp_int_word o;
15195
    #endif
15196
15197
        w = (sp_int_word)a->dp[0] * a->dp[0];
15198
        t[0] = (sp_int_digit)w;
15199
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
15200
        h = 0;
15201
    #ifdef SP_WORD_OVERFLOW
15202
        o = 0;
15203
    #endif
15204
        for (k = 1; k <= (sp_size_t)((a->used - 1) * 2); k++) {
15205
            i = k / 2;
15206
            j = (int)(k - i);
15207
            if (i == (unsigned int)j) {
15208
                w = (sp_int_word)a->dp[i] * a->dp[j];
15209
                l += (sp_int_digit)w;
15210
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15211
            #ifdef SP_WORD_OVERFLOW
15212
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15213
                l &= SP_MASK;
15214
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15215
                h &= SP_MASK;
15216
            #endif
15217
            }
15218
            for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
15219
                w = (sp_int_word)a->dp[i] * a->dp[j];
15220
                l += (sp_int_digit)w;
15221
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15222
            #ifdef SP_WORD_OVERFLOW
15223
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15224
                l &= SP_MASK;
15225
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15226
                h &= SP_MASK;
15227
            #endif
15228
                l += (sp_int_digit)w;
15229
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15230
            #ifdef SP_WORD_OVERFLOW
15231
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15232
                l &= SP_MASK;
15233
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15234
                h &= SP_MASK;
15235
            #endif
15236
            }
15237
            t[k] = (sp_int_digit)l;
15238
            l >>= SP_WORD_SIZE;
15239
            l += (sp_int_digit)h;
15240
            h >>= SP_WORD_SIZE;
15241
        #ifdef SP_WORD_OVERFLOW
15242
            h += o & SP_MASK;
15243
            o >>= SP_WORD_SIZE;
15244
        #endif
15245
        }
15246
        t[k] = (sp_int_digit)l;
15247
        r->used = (sp_size_t)(k + 1);
15248
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
15249
        sp_clamp(r);
15250
    }
15251
15252
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15253
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
15254
#endif
15255
    return err;
15256
}
15257
#endif /* SQR_MUL_ASM */
15258
#endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
15259
15260
#ifndef WOLFSSL_SP_SMALL
15261
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
15262
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
15263
#ifndef SQR_MUL_ASM
15264
/* Square a and store in r. r = a * a
15265
 *
15266
 * Long-hand implementation.
15267
 *
15268
 * @param [in]  a  SP integer to square.
15269
 * @param [out] r  SP integer result.
15270
 *
15271
 * @return  MP_OKAY on success.
15272
 * @return  MP_MEM when dynamic memory allocation fails.
15273
 */
15274
static int _sp_sqr_4(const sp_int* a, sp_int* r)
15275
{
15276
    int err = MP_OKAY;
15277
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15278
    sp_int_word* w = NULL;
15279
#else
15280
    sp_int_word w[10];
15281
#endif
15282
    const sp_int_digit* da = a->dp;
15283
15284
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15285
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
15286
        DYNAMIC_TYPE_BIGINT);
15287
    if (w == NULL) {
15288
        err = MP_MEM;
15289
    }
15290
#endif
15291
15292
15293
    if (err == MP_OKAY) {
15294
        w[0] = (sp_int_word)da[0] * da[0];
15295
        w[1] = (sp_int_word)da[0] * da[1];
15296
        w[2] = (sp_int_word)da[0] * da[2];
15297
        w[3] = (sp_int_word)da[1] * da[1];
15298
        w[4] = (sp_int_word)da[0] * da[3];
15299
        w[5] = (sp_int_word)da[1] * da[2];
15300
        w[6] = (sp_int_word)da[1] * da[3];
15301
        w[7] = (sp_int_word)da[2] * da[2];
15302
        w[8] = (sp_int_word)da[2] * da[3];
15303
        w[9] = (sp_int_word)da[3] * da[3];
15304
15305
        r->dp[0] = (sp_int_digit)w[0];
15306
        w[0] >>= SP_WORD_SIZE;
15307
        w[0] += (sp_int_digit)w[1];
15308
        w[0] += (sp_int_digit)w[1];
15309
        r->dp[1] = (sp_int_digit)w[0];
15310
        w[0] >>= SP_WORD_SIZE;
15311
        w[1] >>= SP_WORD_SIZE;
15312
        w[0] += (sp_int_digit)w[1];
15313
        w[0] += (sp_int_digit)w[1];
15314
        w[0] += (sp_int_digit)w[2];
15315
        w[0] += (sp_int_digit)w[2];
15316
        w[0] += (sp_int_digit)w[3];
15317
        r->dp[2] = (sp_int_digit)w[0];
15318
        w[0] >>= SP_WORD_SIZE;
15319
        w[2] >>= SP_WORD_SIZE;
15320
        w[0] += (sp_int_digit)w[2];
15321
        w[0] += (sp_int_digit)w[2];
15322
        w[3] >>= SP_WORD_SIZE;
15323
        w[0] += (sp_int_digit)w[3];
15324
        w[0] += (sp_int_digit)w[4];
15325
        w[0] += (sp_int_digit)w[4];
15326
        w[0] += (sp_int_digit)w[5];
15327
        w[0] += (sp_int_digit)w[5];
15328
        r->dp[3] = (sp_int_digit)w[0];
15329
        w[0] >>= SP_WORD_SIZE;
15330
        w[4] >>= SP_WORD_SIZE;
15331
        w[0] += (sp_int_digit)w[4];
15332
        w[0] += (sp_int_digit)w[4];
15333
        w[5] >>= SP_WORD_SIZE;
15334
        w[0] += (sp_int_digit)w[5];
15335
        w[0] += (sp_int_digit)w[5];
15336
        w[0] += (sp_int_digit)w[6];
15337
        w[0] += (sp_int_digit)w[6];
15338
        w[0] += (sp_int_digit)w[7];
15339
        r->dp[4] = (sp_int_digit)w[0];
15340
        w[0] >>= SP_WORD_SIZE;
15341
        w[6] >>= SP_WORD_SIZE;
15342
        w[0] += (sp_int_digit)w[6];
15343
        w[0] += (sp_int_digit)w[6];
15344
        w[7] >>= SP_WORD_SIZE;
15345
        w[0] += (sp_int_digit)w[7];
15346
        w[0] += (sp_int_digit)w[8];
15347
        w[0] += (sp_int_digit)w[8];
15348
        r->dp[5] = (sp_int_digit)w[0];
15349
        w[0] >>= SP_WORD_SIZE;
15350
        w[8] >>= SP_WORD_SIZE;
15351
        w[0] += (sp_int_digit)w[8];
15352
        w[0] += (sp_int_digit)w[8];
15353
        w[0] += (sp_int_digit)w[9];
15354
        r->dp[6] = (sp_int_digit)w[0];
15355
        w[0] >>= SP_WORD_SIZE;
15356
        w[9] >>= SP_WORD_SIZE;
15357
        w[0] += (sp_int_digit)w[9];
15358
        r->dp[7] = (sp_int_digit)w[0];
15359
15360
        r->used = 8;
15361
        sp_clamp(r);
15362
    }
15363
15364
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15365
    XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
15366
#endif
15367
    return err;
15368
}
15369
#else /* SQR_MUL_ASM */
15370
/* Square a and store in r. r = a * a
15371
 *
15372
 * Comba implementation.
15373
 *
15374
 * @param [in]  a  SP integer to square.
15375
 * @param [out] r  SP integer result.
15376
 *
15377
 * @return  MP_OKAY on success.
15378
 * @return  MP_MEM when dynamic memory allocation fails.
15379
 */
15380
static int _sp_sqr_4(const sp_int* a, sp_int* r)
15381
0
{
15382
0
    sp_int_digit l = 0;
15383
0
    sp_int_digit h = 0;
15384
0
    sp_int_digit o = 0;
15385
0
    sp_int_digit t[4];
15386
15387
0
    SP_ASM_SQR(h, l, a->dp[0]);
15388
0
    t[0] = h;
15389
0
    h = 0;
15390
0
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15391
0
    t[1] = l;
15392
0
    l = h;
15393
0
    h = o;
15394
0
    o = 0;
15395
0
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15396
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15397
0
    t[2] = l;
15398
0
    l = h;
15399
0
    h = o;
15400
0
    o = 0;
15401
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15402
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15403
0
    t[3] = l;
15404
0
    l = h;
15405
0
    h = o;
15406
0
    o = 0;
15407
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15408
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15409
0
    r->dp[4] = l;
15410
0
    l = h;
15411
0
    h = o;
15412
0
    o = 0;
15413
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
15414
0
    r->dp[5] = l;
15415
0
    l = h;
15416
0
    h = o;
15417
0
    SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
15418
0
    r->dp[6] = l;
15419
0
    r->dp[7] = h;
15420
0
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
15421
0
    r->used = 8;
15422
0
    sp_clamp(r);
15423
15424
0
    return MP_OKAY;
15425
0
}
15426
#endif /* SQR_MUL_ASM */
15427
#endif /* SP_WORD_SIZE == 64 */
15428
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
15429
#ifdef SQR_MUL_ASM
15430
/* Square a and store in r. r = a * a
15431
 *
15432
 * Comba implementation.
15433
 *
15434
 * @param [in]  a  SP integer to square.
15435
 * @param [out] r  SP integer result.
15436
 *
15437
 * @return  MP_OKAY on success.
15438
 * @return  MP_MEM when dynamic memory allocation fails.
15439
 */
15440
static int _sp_sqr_6(const sp_int* a, sp_int* r)
15441
0
{
15442
0
    sp_int_digit l = 0;
15443
0
    sp_int_digit h = 0;
15444
0
    sp_int_digit o = 0;
15445
0
    sp_int_digit tl = 0;
15446
0
    sp_int_digit th = 0;
15447
0
    sp_int_digit to;
15448
0
    sp_int_digit t[6];
15449
15450
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15451
    to = 0;
15452
#endif
15453
15454
0
    SP_ASM_SQR(h, l, a->dp[0]);
15455
0
    t[0] = h;
15456
0
    h = 0;
15457
0
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15458
0
    t[1] = l;
15459
0
    l = h;
15460
0
    h = o;
15461
0
    o = 0;
15462
0
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15463
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15464
0
    t[2] = l;
15465
0
    l = h;
15466
0
    h = o;
15467
0
    o = 0;
15468
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15469
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15470
0
    t[3] = l;
15471
0
    l = h;
15472
0
    h = o;
15473
0
    o = 0;
15474
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15475
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15476
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15477
0
    t[4] = l;
15478
0
    l = h;
15479
0
    h = o;
15480
0
    o = 0;
15481
0
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15482
0
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15483
0
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15484
0
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15485
0
    t[5] = l;
15486
0
    l = h;
15487
0
    h = o;
15488
0
    o = 0;
15489
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
15490
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
15491
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15492
0
    r->dp[6] = l;
15493
0
    l = h;
15494
0
    h = o;
15495
0
    o = 0;
15496
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
15497
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
15498
0
    r->dp[7] = l;
15499
0
    l = h;
15500
0
    h = o;
15501
0
    o = 0;
15502
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
15503
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15504
0
    r->dp[8] = l;
15505
0
    l = h;
15506
0
    h = o;
15507
0
    o = 0;
15508
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
15509
0
    r->dp[9] = l;
15510
0
    l = h;
15511
0
    h = o;
15512
0
    SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
15513
0
    r->dp[10] = l;
15514
0
    r->dp[11] = h;
15515
0
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
15516
0
    r->used = 12;
15517
0
    sp_clamp(r);
15518
15519
0
    return MP_OKAY;
15520
0
}
15521
#endif /* SQR_MUL_ASM */
15522
#endif /* SP_WORD_SIZE == 64 */
15523
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
15524
#ifdef SQR_MUL_ASM
15525
/* Square a and store in r. r = a * a
15526
 *
15527
 * Comba implementation.
15528
 *
15529
 * @param [in]  a  SP integer to square.
15530
 * @param [out] r  SP integer result.
15531
 *
15532
 * @return  MP_OKAY on success.
15533
 * @return  MP_MEM when dynamic memory allocation fails.
15534
 */
15535
static int _sp_sqr_8(const sp_int* a, sp_int* r)
15536
{
15537
    sp_int_digit l = 0;
15538
    sp_int_digit h = 0;
15539
    sp_int_digit o = 0;
15540
    sp_int_digit tl = 0;
15541
    sp_int_digit th = 0;
15542
    sp_int_digit to;
15543
    sp_int_digit t[8];
15544
15545
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15546
    to = 0;
15547
#endif
15548
15549
    SP_ASM_SQR(h, l, a->dp[0]);
15550
    t[0] = h;
15551
    h = 0;
15552
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15553
    t[1] = l;
15554
    l = h;
15555
    h = o;
15556
    o = 0;
15557
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15558
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15559
    t[2] = l;
15560
    l = h;
15561
    h = o;
15562
    o = 0;
15563
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15564
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15565
    t[3] = l;
15566
    l = h;
15567
    h = o;
15568
    o = 0;
15569
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15570
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15571
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15572
    t[4] = l;
15573
    l = h;
15574
    h = o;
15575
    o = 0;
15576
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15577
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15578
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15579
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15580
    t[5] = l;
15581
    l = h;
15582
    h = o;
15583
    o = 0;
15584
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15585
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15586
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15587
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15588
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15589
    t[6] = l;
15590
    l = h;
15591
    h = o;
15592
    o = 0;
15593
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15594
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15595
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15596
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15597
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15598
    t[7] = l;
15599
    l = h;
15600
    h = o;
15601
    o = 0;
15602
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
15603
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15604
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15605
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15606
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15607
    r->dp[8] = l;
15608
    l = h;
15609
    h = o;
15610
    o = 0;
15611
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
15612
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15613
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15614
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15615
    r->dp[9] = l;
15616
    l = h;
15617
    h = o;
15618
    o = 0;
15619
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
15620
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
15621
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15622
    r->dp[10] = l;
15623
    l = h;
15624
    h = o;
15625
    o = 0;
15626
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
15627
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
15628
    r->dp[11] = l;
15629
    l = h;
15630
    h = o;
15631
    o = 0;
15632
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
15633
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15634
    r->dp[12] = l;
15635
    l = h;
15636
    h = o;
15637
    o = 0;
15638
    SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
15639
    r->dp[13] = l;
15640
    l = h;
15641
    h = o;
15642
    SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
15643
    r->dp[14] = l;
15644
    r->dp[15] = h;
15645
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
15646
    r->used = 16;
15647
    sp_clamp(r);
15648
15649
    return MP_OKAY;
15650
}
15651
#endif /* SQR_MUL_ASM */
15652
#endif /* SP_WORD_SIZE == 32 */
15653
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
15654
#ifdef SQR_MUL_ASM
15655
/* Square a and store in r. r = a * a
15656
 *
15657
 * Comba implementation.
15658
 *
15659
 * @param [in]  a  SP integer to square.
15660
 * @param [out] r  SP integer result.
15661
 *
15662
 * @return  MP_OKAY on success.
15663
 * @return  MP_MEM when dynamic memory allocation fails.
15664
 */
15665
static int _sp_sqr_12(const sp_int* a, sp_int* r)
15666
{
15667
    sp_int_digit l = 0;
15668
    sp_int_digit h = 0;
15669
    sp_int_digit o = 0;
15670
    sp_int_digit tl = 0;
15671
    sp_int_digit th = 0;
15672
    sp_int_digit to;
15673
    sp_int_digit t[12];
15674
15675
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15676
    to = 0;
15677
#endif
15678
15679
    SP_ASM_SQR(h, l, a->dp[0]);
15680
    t[0] = h;
15681
    h = 0;
15682
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15683
    t[1] = l;
15684
    l = h;
15685
    h = o;
15686
    o = 0;
15687
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15688
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15689
    t[2] = l;
15690
    l = h;
15691
    h = o;
15692
    o = 0;
15693
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15694
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15695
    t[3] = l;
15696
    l = h;
15697
    h = o;
15698
    o = 0;
15699
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15700
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15701
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15702
    t[4] = l;
15703
    l = h;
15704
    h = o;
15705
    o = 0;
15706
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15707
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15708
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15709
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15710
    t[5] = l;
15711
    l = h;
15712
    h = o;
15713
    o = 0;
15714
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15715
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15716
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15717
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15718
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15719
    t[6] = l;
15720
    l = h;
15721
    h = o;
15722
    o = 0;
15723
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15724
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15725
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15726
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15727
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15728
    t[7] = l;
15729
    l = h;
15730
    h = o;
15731
    o = 0;
15732
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
15733
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
15734
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15735
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15736
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15737
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15738
    t[8] = l;
15739
    l = h;
15740
    h = o;
15741
    o = 0;
15742
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
15743
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
15744
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
15745
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15746
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15747
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15748
    t[9] = l;
15749
    l = h;
15750
    h = o;
15751
    o = 0;
15752
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
15753
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
15754
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
15755
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
15756
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
15757
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15758
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15759
    t[10] = l;
15760
    l = h;
15761
    h = o;
15762
    o = 0;
15763
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
15764
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
15765
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
15766
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
15767
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
15768
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
15769
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15770
    t[11] = l;
15771
    l = h;
15772
    h = o;
15773
    o = 0;
15774
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
15775
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
15776
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
15777
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
15778
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
15779
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15780
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15781
    r->dp[12] = l;
15782
    l = h;
15783
    h = o;
15784
    o = 0;
15785
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
15786
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
15787
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
15788
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
15789
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
15790
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15791
    r->dp[13] = l;
15792
    l = h;
15793
    h = o;
15794
    o = 0;
15795
    SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
15796
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
15797
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
15798
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
15799
    SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
15800
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15801
    r->dp[14] = l;
15802
    l = h;
15803
    h = o;
15804
    o = 0;
15805
    SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
15806
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
15807
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
15808
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
15809
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15810
    r->dp[15] = l;
15811
    l = h;
15812
    h = o;
15813
    o = 0;
15814
    SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
15815
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
15816
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
15817
    SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
15818
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15819
    r->dp[16] = l;
15820
    l = h;
15821
    h = o;
15822
    o = 0;
15823
    SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
15824
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
15825
    SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
15826
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15827
    r->dp[17] = l;
15828
    l = h;
15829
    h = o;
15830
    o = 0;
15831
    SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
15832
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
15833
    SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
15834
    r->dp[18] = l;
15835
    l = h;
15836
    h = o;
15837
    o = 0;
15838
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
15839
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
15840
    r->dp[19] = l;
15841
    l = h;
15842
    h = o;
15843
    o = 0;
15844
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
15845
    SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
15846
    r->dp[20] = l;
15847
    l = h;
15848
    h = o;
15849
    o = 0;
15850
    SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
15851
    r->dp[21] = l;
15852
    l = h;
15853
    h = o;
15854
    SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
15855
    r->dp[22] = l;
15856
    r->dp[23] = h;
15857
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
15858
    r->used = 24;
15859
    sp_clamp(r);
15860
15861
    return MP_OKAY;
15862
}
15863
#endif /* SQR_MUL_ASM */
15864
#endif /* SP_WORD_SIZE == 32 */
15865
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
15866
15867
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
15868
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
15869
    (SP_WORD_SIZE == 64)))
15870
    #if SP_INT_DIGITS >= 32
15871
/* Square a and store in r. r = a * a
15872
 *
15873
 * Comba implementation.
15874
 *
15875
 * @param [in]  a  SP integer to square.
15876
 * @param [out] r  SP integer result.
15877
 *
15878
 * @return  MP_OKAY on success.
15879
 * @return  MP_MEM when dynamic memory allocation fails.
15880
 */
15881
static int _sp_sqr_16(const sp_int* a, sp_int* r)
15882
{
15883
    int err = MP_OKAY;
15884
    sp_int_digit l = 0;
15885
    sp_int_digit h = 0;
15886
    sp_int_digit o = 0;
15887
    sp_int_digit tl = 0;
15888
    sp_int_digit th = 0;
15889
    sp_int_digit to;
15890
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15891
    sp_int_digit* t = NULL;
15892
#else
15893
    sp_int_digit t[16];
15894
#endif
15895
15896
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15897
    to = 0;
15898
#endif
15899
15900
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15901
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
15902
         DYNAMIC_TYPE_BIGINT);
15903
     if (t == NULL) {
15904
         err = MP_MEM;
15905
     }
15906
#endif
15907
    if (err == MP_OKAY) {
15908
        SP_ASM_SQR(h, l, a->dp[0]);
15909
        t[0] = h;
15910
        h = 0;
15911
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15912
        t[1] = l;
15913
        l = h;
15914
        h = o;
15915
        o = 0;
15916
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15917
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15918
        t[2] = l;
15919
        l = h;
15920
        h = o;
15921
        o = 0;
15922
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15923
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15924
        t[3] = l;
15925
        l = h;
15926
        h = o;
15927
        o = 0;
15928
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15929
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15930
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15931
        t[4] = l;
15932
        l = h;
15933
        h = o;
15934
        o = 0;
15935
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15936
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15937
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15938
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15939
        t[5] = l;
15940
        l = h;
15941
        h = o;
15942
        o = 0;
15943
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15944
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15945
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15946
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15947
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15948
        t[6] = l;
15949
        l = h;
15950
        h = o;
15951
        o = 0;
15952
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15953
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15954
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15955
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15956
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15957
        t[7] = l;
15958
        l = h;
15959
        h = o;
15960
        o = 0;
15961
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
15962
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
15963
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15964
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15965
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15966
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15967
        t[8] = l;
15968
        l = h;
15969
        h = o;
15970
        o = 0;
15971
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
15972
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
15973
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
15974
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15975
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15976
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15977
        t[9] = l;
15978
        l = h;
15979
        h = o;
15980
        o = 0;
15981
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
15982
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
15983
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
15984
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
15985
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
15986
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15987
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15988
        t[10] = l;
15989
        l = h;
15990
        h = o;
15991
        o = 0;
15992
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
15993
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
15994
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
15995
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
15996
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
15997
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
15998
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15999
        t[11] = l;
16000
        l = h;
16001
        h = o;
16002
        o = 0;
16003
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
16004
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
16005
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
16006
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
16007
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
16008
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
16009
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
16010
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16011
        t[12] = l;
16012
        l = h;
16013
        h = o;
16014
        o = 0;
16015
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
16016
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
16017
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
16018
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
16019
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
16020
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
16021
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
16022
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16023
        t[13] = l;
16024
        l = h;
16025
        h = o;
16026
        o = 0;
16027
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
16028
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
16029
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
16030
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
16031
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
16032
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
16033
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
16034
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
16035
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16036
        t[14] = l;
16037
        l = h;
16038
        h = o;
16039
        o = 0;
16040
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
16041
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
16042
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
16043
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
16044
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
16045
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
16046
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
16047
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
16048
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16049
        t[15] = l;
16050
        l = h;
16051
        h = o;
16052
        o = 0;
16053
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
16054
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
16055
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
16056
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
16057
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
16058
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
16059
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
16060
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
16061
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16062
        r->dp[16] = l;
16063
        l = h;
16064
        h = o;
16065
        o = 0;
16066
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
16067
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
16068
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
16069
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
16070
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
16071
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
16072
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
16073
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16074
        r->dp[17] = l;
16075
        l = h;
16076
        h = o;
16077
        o = 0;
16078
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
16079
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
16080
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
16081
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
16082
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
16083
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
16084
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
16085
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16086
        r->dp[18] = l;
16087
        l = h;
16088
        h = o;
16089
        o = 0;
16090
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
16091
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
16092
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
16093
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
16094
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
16095
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
16096
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16097
        r->dp[19] = l;
16098
        l = h;
16099
        h = o;
16100
        o = 0;
16101
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
16102
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
16103
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
16104
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
16105
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
16106
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
16107
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16108
        r->dp[20] = l;
16109
        l = h;
16110
        h = o;
16111
        o = 0;
16112
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
16113
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
16114
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
16115
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
16116
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
16117
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16118
        r->dp[21] = l;
16119
        l = h;
16120
        h = o;
16121
        o = 0;
16122
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
16123
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
16124
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
16125
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
16126
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
16127
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16128
        r->dp[22] = l;
16129
        l = h;
16130
        h = o;
16131
        o = 0;
16132
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
16133
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
16134
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
16135
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
16136
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16137
        r->dp[23] = l;
16138
        l = h;
16139
        h = o;
16140
        o = 0;
16141
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
16142
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
16143
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
16144
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
16145
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16146
        r->dp[24] = l;
16147
        l = h;
16148
        h = o;
16149
        o = 0;
16150
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
16151
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
16152
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
16153
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16154
        r->dp[25] = l;
16155
        l = h;
16156
        h = o;
16157
        o = 0;
16158
        SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
16159
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
16160
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
16161
        r->dp[26] = l;
16162
        l = h;
16163
        h = o;
16164
        o = 0;
16165
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
16166
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
16167
        r->dp[27] = l;
16168
        l = h;
16169
        h = o;
16170
        o = 0;
16171
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
16172
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
16173
        r->dp[28] = l;
16174
        l = h;
16175
        h = o;
16176
        o = 0;
16177
        SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
16178
        r->dp[29] = l;
16179
        l = h;
16180
        h = o;
16181
        SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
16182
        r->dp[30] = l;
16183
        r->dp[31] = h;
16184
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
16185
        r->used = 32;
16186
        sp_clamp(r);
16187
    }
16188
16189
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16190
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
16191
#endif
16192
    return err;
16193
}
16194
    #endif /* SP_INT_DIGITS >= 32 */
16195
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
16196
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
16197
16198
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
16199
    #if SP_INT_DIGITS >= 48
16200
/* Square a and store in r. r = a * a
16201
 *
16202
 * Comba implementation.
16203
 *
16204
 * @param [in]  a  SP integer to square.
16205
 * @param [out] r  SP integer result.
16206
 *
16207
 * @return  MP_OKAY on success.
16208
 * @return  MP_MEM when dynamic memory allocation fails.
16209
 */
16210
static int _sp_sqr_24(const sp_int* a, sp_int* r)
16211
{
16212
    int err = MP_OKAY;
16213
    sp_int_digit l = 0;
16214
    sp_int_digit h = 0;
16215
    sp_int_digit o = 0;
16216
    sp_int_digit tl = 0;
16217
    sp_int_digit th = 0;
16218
    sp_int_digit to;
16219
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16220
    sp_int_digit* t = NULL;
16221
#else
16222
    sp_int_digit t[24];
16223
#endif
16224
16225
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
16226
    to = 0;
16227
#endif
16228
16229
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16230
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
16231
         DYNAMIC_TYPE_BIGINT);
16232
     if (t == NULL) {
16233
         err = MP_MEM;
16234
     }
16235
#endif
16236
    if (err == MP_OKAY) {
16237
        SP_ASM_SQR(h, l, a->dp[0]);
16238
        t[0] = h;
16239
        h = 0;
16240
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
16241
        t[1] = l;
16242
        l = h;
16243
        h = o;
16244
        o = 0;
16245
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
16246
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
16247
        t[2] = l;
16248
        l = h;
16249
        h = o;
16250
        o = 0;
16251
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
16252
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
16253
        t[3] = l;
16254
        l = h;
16255
        h = o;
16256
        o = 0;
16257
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
16258
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
16259
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
16260
        t[4] = l;
16261
        l = h;
16262
        h = o;
16263
        o = 0;
16264
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
16265
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
16266
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
16267
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16268
        t[5] = l;
16269
        l = h;
16270
        h = o;
16271
        o = 0;
16272
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
16273
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
16274
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
16275
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
16276
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16277
        t[6] = l;
16278
        l = h;
16279
        h = o;
16280
        o = 0;
16281
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
16282
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
16283
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
16284
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
16285
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16286
        t[7] = l;
16287
        l = h;
16288
        h = o;
16289
        o = 0;
16290
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
16291
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
16292
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
16293
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
16294
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
16295
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16296
        t[8] = l;
16297
        l = h;
16298
        h = o;
16299
        o = 0;
16300
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
16301
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
16302
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
16303
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
16304
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
16305
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16306
        t[9] = l;
16307
        l = h;
16308
        h = o;
16309
        o = 0;
16310
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
16311
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
16312
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
16313
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
16314
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
16315
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
16316
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16317
        t[10] = l;
16318
        l = h;
16319
        h = o;
16320
        o = 0;
16321
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
16322
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
16323
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
16324
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
16325
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
16326
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
16327
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16328
        t[11] = l;
16329
        l = h;
16330
        h = o;
16331
        o = 0;
16332
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
16333
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
16334
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
16335
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
16336
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
16337
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
16338
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
16339
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16340
        t[12] = l;
16341
        l = h;
16342
        h = o;
16343
        o = 0;
16344
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
16345
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
16346
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
16347
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
16348
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
16349
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
16350
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
16351
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16352
        t[13] = l;
16353
        l = h;
16354
        h = o;
16355
        o = 0;
16356
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
16357
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
16358
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
16359
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
16360
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
16361
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
16362
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
16363
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
16364
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16365
        t[14] = l;
16366
        l = h;
16367
        h = o;
16368
        o = 0;
16369
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
16370
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
16371
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
16372
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
16373
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
16374
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
16375
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
16376
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
16377
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16378
        t[15] = l;
16379
        l = h;
16380
        h = o;
16381
        o = 0;
16382
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
16383
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
16384
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
16385
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
16386
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
16387
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
16388
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
16389
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
16390
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
16391
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16392
        t[16] = l;
16393
        l = h;
16394
        h = o;
16395
        o = 0;
16396
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
16397
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
16398
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
16399
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
16400
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
16401
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
16402
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
16403
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
16404
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
16405
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16406
        t[17] = l;
16407
        l = h;
16408
        h = o;
16409
        o = 0;
16410
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
16411
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
16412
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
16413
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
16414
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
16415
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
16416
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
16417
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
16418
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
16419
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
16420
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16421
        t[18] = l;
16422
        l = h;
16423
        h = o;
16424
        o = 0;
16425
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
16426
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
16427
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
16428
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
16429
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
16430
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
16431
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
16432
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
16433
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
16434
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
16435
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16436
        t[19] = l;
16437
        l = h;
16438
        h = o;
16439
        o = 0;
16440
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
16441
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
16442
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
16443
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
16444
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
16445
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
16446
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
16447
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
16448
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
16449
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
16450
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
16451
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16452
        t[20] = l;
16453
        l = h;
16454
        h = o;
16455
        o = 0;
16456
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
16457
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
16458
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
16459
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
16460
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
16461
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
16462
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
16463
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
16464
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
16465
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
16466
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
16467
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16468
        t[21] = l;
16469
        l = h;
16470
        h = o;
16471
        o = 0;
16472
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
16473
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
16474
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
16475
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
16476
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
16477
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
16478
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
16479
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
16480
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
16481
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
16482
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
16483
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
16484
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16485
        t[22] = l;
16486
        l = h;
16487
        h = o;
16488
        o = 0;
16489
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
16490
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
16491
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
16492
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
16493
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
16494
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
16495
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
16496
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
16497
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
16498
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
16499
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
16500
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
16501
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16502
        t[23] = l;
16503
        l = h;
16504
        h = o;
16505
        o = 0;
16506
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
16507
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
16508
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
16509
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
16510
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
16511
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
16512
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
16513
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
16514
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
16515
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
16516
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
16517
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
16518
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16519
        r->dp[24] = l;
16520
        l = h;
16521
        h = o;
16522
        o = 0;
16523
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
16524
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
16525
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
16526
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
16527
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
16528
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
16529
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
16530
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
16531
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
16532
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
16533
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
16534
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16535
        r->dp[25] = l;
16536
        l = h;
16537
        h = o;
16538
        o = 0;
16539
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
16540
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
16541
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
16542
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
16543
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
16544
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
16545
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
16546
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
16547
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
16548
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
16549
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
16550
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16551
        r->dp[26] = l;
16552
        l = h;
16553
        h = o;
16554
        o = 0;
16555
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
16556
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
16557
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
16558
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
16559
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
16560
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
16561
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
16562
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
16563
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
16564
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
16565
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16566
        r->dp[27] = l;
16567
        l = h;
16568
        h = o;
16569
        o = 0;
16570
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
16571
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
16572
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
16573
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
16574
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
16575
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
16576
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
16577
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
16578
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
16579
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
16580
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16581
        r->dp[28] = l;
16582
        l = h;
16583
        h = o;
16584
        o = 0;
16585
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
16586
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
16587
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
16588
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
16589
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
16590
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
16591
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
16592
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
16593
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
16594
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16595
        r->dp[29] = l;
16596
        l = h;
16597
        h = o;
16598
        o = 0;
16599
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
16600
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
16601
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
16602
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
16603
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
16604
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
16605
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
16606
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
16607
        SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
16608
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16609
        r->dp[30] = l;
16610
        l = h;
16611
        h = o;
16612
        o = 0;
16613
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
16614
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
16615
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
16616
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
16617
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
16618
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
16619
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
16620
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
16621
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16622
        r->dp[31] = l;
16623
        l = h;
16624
        h = o;
16625
        o = 0;
16626
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
16627
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
16628
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
16629
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
16630
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
16631
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
16632
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
16633
        SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
16634
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16635
        r->dp[32] = l;
16636
        l = h;
16637
        h = o;
16638
        o = 0;
16639
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
16640
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
16641
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
16642
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
16643
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
16644
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
16645
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
16646
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16647
        r->dp[33] = l;
16648
        l = h;
16649
        h = o;
16650
        o = 0;
16651
        SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
16652
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
16653
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
16654
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
16655
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
16656
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
16657
        SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
16658
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16659
        r->dp[34] = l;
16660
        l = h;
16661
        h = o;
16662
        o = 0;
16663
        SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
16664
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
16665
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
16666
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
16667
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
16668
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
16669
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16670
        r->dp[35] = l;
16671
        l = h;
16672
        h = o;
16673
        o = 0;
16674
        SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
16675
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
16676
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
16677
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
16678
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
16679
        SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
16680
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16681
        r->dp[36] = l;
16682
        l = h;
16683
        h = o;
16684
        o = 0;
16685
        SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
16686
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
16687
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
16688
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
16689
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
16690
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16691
        r->dp[37] = l;
16692
        l = h;
16693
        h = o;
16694
        o = 0;
16695
        SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
16696
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
16697
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
16698
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
16699
        SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
16700
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16701
        r->dp[38] = l;
16702
        l = h;
16703
        h = o;
16704
        o = 0;
16705
        SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
16706
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
16707
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
16708
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
16709
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16710
        r->dp[39] = l;
16711
        l = h;
16712
        h = o;
16713
        o = 0;
16714
        SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
16715
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
16716
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
16717
        SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
16718
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16719
        r->dp[40] = l;
16720
        l = h;
16721
        h = o;
16722
        o = 0;
16723
        SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
16724
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
16725
        SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
16726
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16727
        r->dp[41] = l;
16728
        l = h;
16729
        h = o;
16730
        o = 0;
16731
        SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
16732
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
16733
        SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
16734
        r->dp[42] = l;
16735
        l = h;
16736
        h = o;
16737
        o = 0;
16738
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
16739
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
16740
        r->dp[43] = l;
16741
        l = h;
16742
        h = o;
16743
        o = 0;
16744
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
16745
        SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
16746
        r->dp[44] = l;
16747
        l = h;
16748
        h = o;
16749
        o = 0;
16750
        SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
16751
        r->dp[45] = l;
16752
        l = h;
16753
        h = o;
16754
        SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
16755
        r->dp[46] = l;
16756
        r->dp[47] = h;
16757
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
16758
        r->used = 48;
16759
        sp_clamp(r);
16760
    }
16761
16762
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16763
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
16764
#endif
16765
    return err;
16766
}
16767
    #endif /* SP_INT_DIGITS >= 48 */
16768
16769
    #if SP_INT_DIGITS >= 64
16770
/* Square a and store in r. r = a * a
16771
 *
16772
 * Karatsuba implementation.
16773
 *
16774
 * @param [in]  a  SP integer to square.
16775
 * @param [out] r  SP integer result.
16776
 *
16777
 * @return  MP_OKAY on success.
16778
 * @return  MP_MEM when dynamic memory allocation fails.
16779
 */
16780
static int _sp_sqr_32(const sp_int* a, sp_int* r)
16781
{
16782
    int err = MP_OKAY;
16783
    unsigned int i;
16784
    sp_int_digit l;
16785
    sp_int_digit h;
16786
    sp_int* z0;
16787
    sp_int* z1;
16788
    sp_int* z2;
16789
    sp_int_digit ca;
16790
    DECL_SP_INT(a1, 16);
16791
    DECL_SP_INT_ARRAY(z, 33, 2);
16792
16793
    ALLOC_SP_INT(a1, 16, err, NULL);
16794
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
16795
    if (err == MP_OKAY) {
16796
        z1 = z[0];
16797
        z2 = z[1];
16798
        z0 = r;
16799
16800
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
16801
        a1->used = 16;
16802
16803
        /* z2 = a1 ^ 2 */
16804
        err = _sp_sqr_16(a1, z2);
16805
    }
16806
    if (err == MP_OKAY) {
16807
        l = 0;
16808
        h = 0;
16809
        for (i = 0; i < 16; i++) {
16810
            SP_ASM_ADDC(l, h, a1->dp[i]);
16811
            SP_ASM_ADDC(l, h, a->dp[i]);
16812
            a1->dp[i] = l;
16813
            l = h;
16814
            h = 0;
16815
        }
16816
        ca = l;
16817
16818
        /* z0 = a0 ^ 2 */
16819
        err = _sp_sqr_16(a, z0);
16820
    }
16821
    if (err == MP_OKAY) {
16822
        /* z1 = (a0 + a1) ^ 2 */
16823
        err = _sp_sqr_16(a1, z1);
16824
    }
16825
    if (err == MP_OKAY) {
16826
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
16827
        /* r = z0 */
16828
        /* r += (z1 - z0 - z2) << 16 */
16829
        z1->dp[32] = ca;
16830
        l = 0;
16831
        if (ca) {
16832
            l = z1->dp[0 + 16];
16833
            h = 0;
16834
            SP_ASM_ADDC(l, h, a1->dp[0]);
16835
            SP_ASM_ADDC(l, h, a1->dp[0]);
16836
            z1->dp[0 + 16] = l;
16837
            l = h;
16838
            h = 0;
16839
            for (i = 1; i < 16; i++) {
16840
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
16841
                SP_ASM_ADDC(l, h, a1->dp[i]);
16842
                SP_ASM_ADDC(l, h, a1->dp[i]);
16843
                z1->dp[i + 16] = l;
16844
                l = h;
16845
                h = 0;
16846
            }
16847
        }
16848
        z1->dp[32] += l;
16849
        /* z1 = z1 - z0 - z2 */
16850
        l = z1->dp[0];
16851
        h = 0;
16852
        SP_ASM_SUBB(l, h, z0->dp[0]);
16853
        SP_ASM_SUBB(l, h, z2->dp[0]);
16854
        z1->dp[0] = l;
16855
        l = h;
16856
        h = 0;
16857
        for (i = 1; i < 32; i++) {
16858
            l += z1->dp[i];
16859
            SP_ASM_SUBB(l, h, z0->dp[i]);
16860
            SP_ASM_SUBB(l, h, z2->dp[i]);
16861
            z1->dp[i] = l;
16862
            l = h;
16863
            h = 0;
16864
        }
16865
        z1->dp[i] += l;
16866
        /* r += z1 << 16 */
16867
        l = 0;
16868
        h = 0;
16869
        for (i = 0; i < 16; i++) {
16870
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
16871
            SP_ASM_ADDC(l, h, z1->dp[i]);
16872
            r->dp[i + 16] = l;
16873
            l = h;
16874
            h = 0;
16875
        }
16876
        for (; i < 33; i++) {
16877
            SP_ASM_ADDC(l, h, z1->dp[i]);
16878
            r->dp[i + 16] = l;
16879
            l = h;
16880
            h = 0;
16881
        }
16882
        /* r += z2 << 32  */
16883
        l = 0;
16884
        h = 0;
16885
        for (i = 0; i < 17; i++) {
16886
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
16887
            SP_ASM_ADDC(l, h, z2->dp[i]);
16888
            r->dp[i + 32] = l;
16889
            l = h;
16890
            h = 0;
16891
        }
16892
        for (; i < 32; i++) {
16893
            SP_ASM_ADDC(l, h, z2->dp[i]);
16894
            r->dp[i + 32] = l;
16895
            l = h;
16896
            h = 0;
16897
        }
16898
        r->used = 64;
16899
        sp_clamp(r);
16900
    }
16901
16902
    FREE_SP_INT_ARRAY(z, NULL);
16903
    FREE_SP_INT(a1, NULL);
16904
    return err;
16905
}
16906
    #endif /* SP_INT_DIGITS >= 64 */
16907
16908
    #if SP_INT_DIGITS >= 96
16909
/* Square a and store in r. r = a * a
16910
 *
16911
 * Karatsuba implementation.
16912
 *
16913
 * @param [in]  a  SP integer to square.
16914
 * @param [out] r  SP integer result.
16915
 *
16916
 * @return  MP_OKAY on success.
16917
 * @return  MP_MEM when dynamic memory allocation fails.
16918
 */
16919
static int _sp_sqr_48(const sp_int* a, sp_int* r)
16920
{
16921
    int err = MP_OKAY;
16922
    unsigned int i;
16923
    sp_int_digit l;
16924
    sp_int_digit h;
16925
    sp_int* z0;
16926
    sp_int* z1;
16927
    sp_int* z2;
16928
    sp_int_digit ca;
16929
    DECL_SP_INT(a1, 24);
16930
    DECL_SP_INT_ARRAY(z, 49, 2);
16931
16932
    ALLOC_SP_INT(a1, 24, err, NULL);
16933
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
16934
    if (err == MP_OKAY) {
16935
        z1 = z[0];
16936
        z2 = z[1];
16937
        z0 = r;
16938
16939
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
16940
        a1->used = 24;
16941
16942
        /* z2 = a1 ^ 2 */
16943
        err = _sp_sqr_24(a1, z2);
16944
    }
16945
    if (err == MP_OKAY) {
16946
        l = 0;
16947
        h = 0;
16948
        for (i = 0; i < 24; i++) {
16949
            SP_ASM_ADDC(l, h, a1->dp[i]);
16950
            SP_ASM_ADDC(l, h, a->dp[i]);
16951
            a1->dp[i] = l;
16952
            l = h;
16953
            h = 0;
16954
        }
16955
        ca = l;
16956
16957
        /* z0 = a0 ^ 2 */
16958
        err = _sp_sqr_24(a, z0);
16959
    }
16960
    if (err == MP_OKAY) {
16961
        /* z1 = (a0 + a1) ^ 2 */
16962
        err = _sp_sqr_24(a1, z1);
16963
    }
16964
    if (err == MP_OKAY) {
16965
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
16966
        /* r = z0 */
16967
        /* r += (z1 - z0 - z2) << 24 */
16968
        z1->dp[48] = ca;
16969
        l = 0;
16970
        if (ca) {
16971
            l = z1->dp[0 + 24];
16972
            h = 0;
16973
            SP_ASM_ADDC(l, h, a1->dp[0]);
16974
            SP_ASM_ADDC(l, h, a1->dp[0]);
16975
            z1->dp[0 + 24] = l;
16976
            l = h;
16977
            h = 0;
16978
            for (i = 1; i < 24; i++) {
16979
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
16980
                SP_ASM_ADDC(l, h, a1->dp[i]);
16981
                SP_ASM_ADDC(l, h, a1->dp[i]);
16982
                z1->dp[i + 24] = l;
16983
                l = h;
16984
                h = 0;
16985
            }
16986
        }
16987
        z1->dp[48] += l;
16988
        /* z1 = z1 - z0 - z2 */
16989
        l = z1->dp[0];
16990
        h = 0;
16991
        SP_ASM_SUBB(l, h, z0->dp[0]);
16992
        SP_ASM_SUBB(l, h, z2->dp[0]);
16993
        z1->dp[0] = l;
16994
        l = h;
16995
        h = 0;
16996
        for (i = 1; i < 48; i++) {
16997
            l += z1->dp[i];
16998
            SP_ASM_SUBB(l, h, z0->dp[i]);
16999
            SP_ASM_SUBB(l, h, z2->dp[i]);
17000
            z1->dp[i] = l;
17001
            l = h;
17002
            h = 0;
17003
        }
17004
        z1->dp[i] += l;
17005
        /* r += z1 << 24 */
17006
        l = 0;
17007
        h = 0;
17008
        for (i = 0; i < 24; i++) {
17009
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
17010
            SP_ASM_ADDC(l, h, z1->dp[i]);
17011
            r->dp[i + 24] = l;
17012
            l = h;
17013
            h = 0;
17014
        }
17015
        for (; i < 49; i++) {
17016
            SP_ASM_ADDC(l, h, z1->dp[i]);
17017
            r->dp[i + 24] = l;
17018
            l = h;
17019
            h = 0;
17020
        }
17021
        /* r += z2 << 48  */
17022
        l = 0;
17023
        h = 0;
17024
        for (i = 0; i < 25; i++) {
17025
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
17026
            SP_ASM_ADDC(l, h, z2->dp[i]);
17027
            r->dp[i + 48] = l;
17028
            l = h;
17029
            h = 0;
17030
        }
17031
        for (; i < 48; i++) {
17032
            SP_ASM_ADDC(l, h, z2->dp[i]);
17033
            r->dp[i + 48] = l;
17034
            l = h;
17035
            h = 0;
17036
        }
17037
        r->used = 96;
17038
        sp_clamp(r);
17039
    }
17040
17041
    FREE_SP_INT_ARRAY(z, NULL);
17042
    FREE_SP_INT(a1, NULL);
17043
    return err;
17044
}
17045
    #endif /* SP_INT_DIGITS >= 96 */
17046
17047
    #if SP_INT_DIGITS >= 128
17048
/* Square a and store in r. r = a * a
17049
 *
17050
 * Karatsuba implementation.
17051
 *
17052
 * @param [in]  a  SP integer to square.
17053
 * @param [out] r  SP integer result.
17054
 *
17055
 * @return  MP_OKAY on success.
17056
 * @return  MP_MEM when dynamic memory allocation fails.
17057
 */
17058
static int _sp_sqr_64(const sp_int* a, sp_int* r)
17059
{
17060
    int err = MP_OKAY;
17061
    unsigned int i;
17062
    sp_int_digit l;
17063
    sp_int_digit h;
17064
    sp_int* z0;
17065
    sp_int* z1;
17066
    sp_int* z2;
17067
    sp_int_digit ca;
17068
    DECL_SP_INT(a1, 32);
17069
    DECL_SP_INT_ARRAY(z, 65, 2);
17070
17071
    ALLOC_SP_INT(a1, 32, err, NULL);
17072
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
17073
    if (err == MP_OKAY) {
17074
        z1 = z[0];
17075
        z2 = z[1];
17076
        z0 = r;
17077
17078
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
17079
        a1->used = 32;
17080
17081
        /* z2 = a1 ^ 2 */
17082
        err = _sp_sqr_32(a1, z2);
17083
    }
17084
    if (err == MP_OKAY) {
17085
        l = 0;
17086
        h = 0;
17087
        for (i = 0; i < 32; i++) {
17088
            SP_ASM_ADDC(l, h, a1->dp[i]);
17089
            SP_ASM_ADDC(l, h, a->dp[i]);
17090
            a1->dp[i] = l;
17091
            l = h;
17092
            h = 0;
17093
        }
17094
        ca = l;
17095
17096
        /* z0 = a0 ^ 2 */
17097
        err = _sp_sqr_32(a, z0);
17098
    }
17099
    if (err == MP_OKAY) {
17100
        /* z1 = (a0 + a1) ^ 2 */
17101
        err = _sp_sqr_32(a1, z1);
17102
    }
17103
    if (err == MP_OKAY) {
17104
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
17105
        /* r = z0 */
17106
        /* r += (z1 - z0 - z2) << 32 */
17107
        z1->dp[64] = ca;
17108
        l = 0;
17109
        if (ca) {
17110
            l = z1->dp[0 + 32];
17111
            h = 0;
17112
            SP_ASM_ADDC(l, h, a1->dp[0]);
17113
            SP_ASM_ADDC(l, h, a1->dp[0]);
17114
            z1->dp[0 + 32] = l;
17115
            l = h;
17116
            h = 0;
17117
            for (i = 1; i < 32; i++) {
17118
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
17119
                SP_ASM_ADDC(l, h, a1->dp[i]);
17120
                SP_ASM_ADDC(l, h, a1->dp[i]);
17121
                z1->dp[i + 32] = l;
17122
                l = h;
17123
                h = 0;
17124
            }
17125
        }
17126
        z1->dp[64] += l;
17127
        /* z1 = z1 - z0 - z2 */
17128
        l = z1->dp[0];
17129
        h = 0;
17130
        SP_ASM_SUBB(l, h, z0->dp[0]);
17131
        SP_ASM_SUBB(l, h, z2->dp[0]);
17132
        z1->dp[0] = l;
17133
        l = h;
17134
        h = 0;
17135
        for (i = 1; i < 64; i++) {
17136
            l += z1->dp[i];
17137
            SP_ASM_SUBB(l, h, z0->dp[i]);
17138
            SP_ASM_SUBB(l, h, z2->dp[i]);
17139
            z1->dp[i] = l;
17140
            l = h;
17141
            h = 0;
17142
        }
17143
        z1->dp[i] += l;
17144
        /* r += z1 << 32 */
17145
        l = 0;
17146
        h = 0;
17147
        for (i = 0; i < 32; i++) {
17148
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
17149
            SP_ASM_ADDC(l, h, z1->dp[i]);
17150
            r->dp[i + 32] = l;
17151
            l = h;
17152
            h = 0;
17153
        }
17154
        for (; i < 65; i++) {
17155
            SP_ASM_ADDC(l, h, z1->dp[i]);
17156
            r->dp[i + 32] = l;
17157
            l = h;
17158
            h = 0;
17159
        }
17160
        /* r += z2 << 64  */
17161
        l = 0;
17162
        h = 0;
17163
        for (i = 0; i < 33; i++) {
17164
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
17165
            SP_ASM_ADDC(l, h, z2->dp[i]);
17166
            r->dp[i + 64] = l;
17167
            l = h;
17168
            h = 0;
17169
        }
17170
        for (; i < 64; i++) {
17171
            SP_ASM_ADDC(l, h, z2->dp[i]);
17172
            r->dp[i + 64] = l;
17173
            l = h;
17174
            h = 0;
17175
        }
17176
        r->used = 128;
17177
        sp_clamp(r);
17178
    }
17179
17180
    FREE_SP_INT_ARRAY(z, NULL);
17181
    FREE_SP_INT(a1, NULL);
17182
    return err;
17183
}
17184
    #endif /* SP_INT_DIGITS >= 128 */
17185
17186
    #if SP_INT_DIGITS >= 192
17187
/* Square a and store in r. r = a * a
17188
 *
17189
 * Karatsuba implementation.
17190
 *
17191
 * @param [in]  a  SP integer to square.
17192
 * @param [out] r  SP integer result.
17193
 *
17194
 * @return  MP_OKAY on success.
17195
 * @return  MP_MEM when dynamic memory allocation fails.
17196
 */
17197
static int _sp_sqr_96(const sp_int* a, sp_int* r)
17198
{
17199
    int err = MP_OKAY;
17200
    unsigned int i;
17201
    sp_int_digit l;
17202
    sp_int_digit h;
17203
    sp_int* z0;
17204
    sp_int* z1;
17205
    sp_int* z2;
17206
    sp_int_digit ca;
17207
    DECL_SP_INT(a1, 48);
17208
    DECL_SP_INT_ARRAY(z, 97, 2);
17209
17210
    ALLOC_SP_INT(a1, 48, err, NULL);
17211
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
17212
    if (err == MP_OKAY) {
17213
        z1 = z[0];
17214
        z2 = z[1];
17215
        z0 = r;
17216
17217
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
17218
        a1->used = 48;
17219
17220
        /* z2 = a1 ^ 2 */
17221
        err = _sp_sqr_48(a1, z2);
17222
    }
17223
    if (err == MP_OKAY) {
17224
        l = 0;
17225
        h = 0;
17226
        for (i = 0; i < 48; i++) {
17227
            SP_ASM_ADDC(l, h, a1->dp[i]);
17228
            SP_ASM_ADDC(l, h, a->dp[i]);
17229
            a1->dp[i] = l;
17230
            l = h;
17231
            h = 0;
17232
        }
17233
        ca = l;
17234
17235
        /* z0 = a0 ^ 2 */
17236
        err = _sp_sqr_48(a, z0);
17237
    }
17238
    if (err == MP_OKAY) {
17239
        /* z1 = (a0 + a1) ^ 2 */
17240
        err = _sp_sqr_48(a1, z1);
17241
    }
17242
    if (err == MP_OKAY) {
17243
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
17244
        /* r = z0 */
17245
        /* r += (z1 - z0 - z2) << 48 */
17246
        z1->dp[96] = ca;
17247
        l = 0;
17248
        if (ca) {
17249
            l = z1->dp[0 + 48];
17250
            h = 0;
17251
            SP_ASM_ADDC(l, h, a1->dp[0]);
17252
            SP_ASM_ADDC(l, h, a1->dp[0]);
17253
            z1->dp[0 + 48] = l;
17254
            l = h;
17255
            h = 0;
17256
            for (i = 1; i < 48; i++) {
17257
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
17258
                SP_ASM_ADDC(l, h, a1->dp[i]);
17259
                SP_ASM_ADDC(l, h, a1->dp[i]);
17260
                z1->dp[i + 48] = l;
17261
                l = h;
17262
                h = 0;
17263
            }
17264
        }
17265
        z1->dp[96] += l;
17266
        /* z1 = z1 - z0 - z2 */
17267
        l = z1->dp[0];
17268
        h = 0;
17269
        SP_ASM_SUBB(l, h, z0->dp[0]);
17270
        SP_ASM_SUBB(l, h, z2->dp[0]);
17271
        z1->dp[0] = l;
17272
        l = h;
17273
        h = 0;
17274
        for (i = 1; i < 96; i++) {
17275
            l += z1->dp[i];
17276
            SP_ASM_SUBB(l, h, z0->dp[i]);
17277
            SP_ASM_SUBB(l, h, z2->dp[i]);
17278
            z1->dp[i] = l;
17279
            l = h;
17280
            h = 0;
17281
        }
17282
        z1->dp[i] += l;
17283
        /* r += z1 << 48 */
17284
        l = 0;
17285
        h = 0;
17286
        for (i = 0; i < 48; i++) {
17287
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
17288
            SP_ASM_ADDC(l, h, z1->dp[i]);
17289
            r->dp[i + 48] = l;
17290
            l = h;
17291
            h = 0;
17292
        }
17293
        for (; i < 97; i++) {
17294
            SP_ASM_ADDC(l, h, z1->dp[i]);
17295
            r->dp[i + 48] = l;
17296
            l = h;
17297
            h = 0;
17298
        }
17299
        /* r += z2 << 96  */
17300
        l = 0;
17301
        h = 0;
17302
        for (i = 0; i < 49; i++) {
17303
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
17304
            SP_ASM_ADDC(l, h, z2->dp[i]);
17305
            r->dp[i + 96] = l;
17306
            l = h;
17307
            h = 0;
17308
        }
17309
        for (; i < 96; i++) {
17310
            SP_ASM_ADDC(l, h, z2->dp[i]);
17311
            r->dp[i + 96] = l;
17312
            l = h;
17313
            h = 0;
17314
        }
17315
        r->used = 192;
17316
        sp_clamp(r);
17317
    }
17318
17319
    FREE_SP_INT_ARRAY(z, NULL);
17320
    FREE_SP_INT(a1, NULL);
17321
    return err;
17322
}
17323
    #endif /* SP_INT_DIGITS >= 192 */
17324
17325
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
17326
#endif /* !WOLFSSL_SP_SMALL */
17327
17328
/* Square a and store in r. r = a * a
17329
 *
17330
 * @param [in]  a  SP integer to square.
17331
 * @param [out] r  SP integer result.
17332
 *
17333
 * @return  MP_OKAY on success.
17334
 * @return  MP_VAL when a or r is NULL, or the result will be too big for fixed
17335
 *          data length.
17336
 * @return  MP_MEM when dynamic memory allocation fails.
17337
 */
17338
int sp_sqr(const sp_int* a, sp_int* r)
17339
0
{
17340
#if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
17341
    return sp_mul(a, a, r);
17342
#else
17343
0
    int err = MP_OKAY;
17344
17345
0
    if ((a == NULL) || (r == NULL)) {
17346
0
        err = MP_VAL;
17347
0
    }
17348
    /* Need extra digit during calculation. */
17349
0
    if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
17350
0
        err = MP_VAL;
17351
0
    }
17352
17353
#if 0
17354
    if (err == MP_OKAY) {
17355
        sp_print(a, "a");
17356
    }
17357
#endif
17358
17359
0
    if (err == MP_OKAY) {
17360
0
        if (a->used == 0) {
17361
0
            _sp_zero(r);
17362
0
        }
17363
0
    else
17364
0
#ifndef WOLFSSL_SP_SMALL
17365
0
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
17366
0
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
17367
0
        if (a->used == 4) {
17368
0
            err = _sp_sqr_4(a, r);
17369
0
        }
17370
0
        else
17371
0
#endif /* SP_WORD_SIZE == 64 */
17372
0
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
17373
0
#ifdef SQR_MUL_ASM
17374
0
        if (a->used == 6) {
17375
0
            err = _sp_sqr_6(a, r);
17376
0
        }
17377
0
        else
17378
0
#endif /* SQR_MUL_ASM */
17379
0
#endif /* SP_WORD_SIZE == 64 */
17380
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
17381
#ifdef SQR_MUL_ASM
17382
        if (a->used == 8) {
17383
            err = _sp_sqr_8(a, r);
17384
        }
17385
        else
17386
#endif /* SQR_MUL_ASM */
17387
#endif /* SP_WORD_SIZE == 32 */
17388
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
17389
#ifdef SQR_MUL_ASM
17390
        if (a->used == 12) {
17391
            err = _sp_sqr_12(a, r);
17392
        }
17393
        else
17394
#endif /* SQR_MUL_ASM */
17395
#endif /* SP_WORD_SIZE == 32 */
17396
0
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
17397
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
17398
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
17399
    (SP_WORD_SIZE == 64)))
17400
    #if SP_INT_DIGITS >= 32
17401
        if (a->used == 16) {
17402
            err = _sp_sqr_16(a, r);
17403
        }
17404
        else
17405
    #endif /* SP_INT_DIGITS >= 32 */
17406
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
17407
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
17408
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
17409
    #if SP_INT_DIGITS >= 48
17410
        if (a->used == 24) {
17411
            err = _sp_sqr_24(a, r);
17412
        }
17413
        else
17414
    #endif /* SP_INT_DIGITS >= 48 */
17415
    #if SP_INT_DIGITS >= 64
17416
        if (a->used == 32) {
17417
            err = _sp_sqr_32(a, r);
17418
        }
17419
        else
17420
    #endif /* SP_INT_DIGITS >= 64 */
17421
    #if SP_INT_DIGITS >= 96
17422
        if (a->used == 48) {
17423
            err = _sp_sqr_48(a, r);
17424
        }
17425
        else
17426
    #endif /* SP_INT_DIGITS >= 96 */
17427
    #if SP_INT_DIGITS >= 128
17428
        if (a->used == 64) {
17429
            err = _sp_sqr_64(a, r);
17430
        }
17431
        else
17432
    #endif /* SP_INT_DIGITS >= 128 */
17433
    #if SP_INT_DIGITS >= 192
17434
        if (a->used == 96) {
17435
            err = _sp_sqr_96(a, r);
17436
        }
17437
        else
17438
    #endif /* SP_INT_DIGITS >= 192 */
17439
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
17440
0
#endif /* !WOLFSSL_SP_SMALL */
17441
0
        {
17442
0
            err = _sp_sqr(a, r);
17443
0
        }
17444
0
    }
17445
17446
#ifdef WOLFSSL_SP_INT_NEGATIVE
17447
    if (err == MP_OKAY) {
17448
        r->sign = MP_ZPOS;
17449
    }
17450
#endif
17451
17452
#if 0
17453
    if (err == MP_OKAY) {
17454
        sp_print(r, "rsqr");
17455
    }
17456
#endif
17457
17458
0
    return err;
17459
0
#endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
17460
0
}
17461
/* END SP_SQR implementations */
17462
17463
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
17464
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
17465
17466
#if defined(WOLFSSL_SP_MATH_ALL) || \
17467
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
17468
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || defined(HAVE_ECC)
17469
/* Square a mod m and store in r: r = (a * a) mod m
17470
 *
17471
 * @param [in]  a  SP integer to square.
17472
 * @param [in]  m  SP integer that is the modulus.
17473
 * @param [out] r  SP integer result.
17474
 *
17475
 * @return  MP_OKAY on success.
17476
 * @return  MP_MEM when dynamic memory allocation fails.
17477
 */
17478
static int _sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
17479
0
{
17480
0
    int err = MP_OKAY;
17481
17482
0
    if (sp_iszero(a)) {
17483
0
        _sp_zero(r);
17484
0
    }
17485
0
    else {
17486
        /* Create temporary for multiplication result. */
17487
0
        DECL_SP_INT(t, a->used * 2);
17488
17489
0
        ALLOC_SP_INT(t, a->used * 2, err, NULL);
17490
0
        if (err == MP_OKAY) {
17491
0
            err = sp_init_size(t, a->used * 2U);
17492
0
        }
17493
17494
        /* Square and reduce. */
17495
0
        if (err == MP_OKAY) {
17496
0
            err = sp_sqr(a, t);
17497
0
        }
17498
0
        if (err == MP_OKAY) {
17499
0
            err = sp_mod(t, m, r);
17500
0
        }
17501
17502
        /* Dispose of an allocated SP int. */
17503
0
        FREE_SP_INT(t, NULL);
17504
0
    }
17505
17506
0
    return err;
17507
0
}
17508
17509
/* Square a mod m and store in r: r = (a * a) mod m
17510
 *
17511
 * @param [in]  a  SP integer to square.
17512
 * @param [in]  m  SP integer that is the modulus.
17513
 * @param [out] r  SP integer result.
17514
 *
17515
 * @return  MP_OKAY on success.
17516
 * @return  MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
17517
 *          for fixed data length.
17518
 * @return  MP_MEM when dynamic memory allocation fails.
17519
 */
17520
int sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
17521
0
{
17522
0
    int err = MP_OKAY;
17523
17524
    /* Validate parameters. */
17525
0
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
17526
0
        err = MP_VAL;
17527
0
    }
17528
    /* Ensure r has space for intermediate result. */
17529
0
    if ((err == MP_OKAY) && (r != m) && (a->used * 2 > r->size)) {
17530
0
        err = MP_VAL;
17531
0
    }
17532
    /* Ensure a is not too big. */
17533
0
    if ((err == MP_OKAY) && (r == m) && (a->used * 2 > SP_INT_DIGITS)) {
17534
0
        err = MP_VAL;
17535
0
    }
17536
17537
    /* Use r as intermediate result if not same as pointer m which is needed
17538
     * after first intermediate result.
17539
     */
17540
0
    if ((err == MP_OKAY) && (r != m)) {
17541
        /* Square and reduce. */
17542
0
        err = sp_sqr(a, r);
17543
0
        if (err == MP_OKAY) {
17544
0
            err = sp_mod(r, m, r);
17545
0
        }
17546
0
    }
17547
0
    else if (err == MP_OKAY) {
17548
        /* Do operation with temporary. */
17549
0
        err = _sp_sqrmod(a, m, r);
17550
0
    }
17551
17552
0
    return err;
17553
0
}
17554
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
17555
17556
/**********************
17557
 * Montgomery functions
17558
 **********************/
17559
17560
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
17561
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
17562
    defined(OPENSSL_ALL)
17563
/* Reduce a number in Montgomery form.
17564
 *
17565
 * Assumes a and m are not NULL and m is not 0.
17566
 *
17567
 * DigitMask(a,i) := mask out the 'i'th digit in place.
17568
 *
17569
 * Algorithm:
17570
 *  1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
17571
 *  2. For i = 0..NumDigits(m)-1
17572
 *   2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK
17573
 *   2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask
17574
 *   2.3. a += mu * DigitMask(m, 0)
17575
 *   2.4. For j = 1 up to NumDigits(m)-2
17576
 *    2.4.1 a += mu * DigitMask(m, j)
17577
 *   2.5 a += mu * DigitMask(m, NumDigits(m)-1)
17578
 * 3. a >>= NumBits(m)
17579
 * 4. a = a % m
17580
 *
17581
 * @param [in, out] a   SP integer to Montgomery reduce.
17582
 * @param [in]      m   SP integer that is the modulus.
17583
 * @param [in]      mp  SP integer digit that is the bottom digit of inv(-m).
17584
 * @param [in]      ct  Indicates operation must be constant time.
17585
 *
17586
 * @return  MP_OKAY on success.
17587
 */
17588
static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
17589
0
{
17590
#if !defined(SQR_MUL_ASM)
17591
    unsigned int i;
17592
    int bits;
17593
    sp_int_word w;
17594
    sp_int_digit mu;
17595
17596
#if 0
17597
    sp_print(a, "a");
17598
    sp_print(m, "m");
17599
#endif
17600
17601
    /* Count bits in modulus. */
17602
    bits = sp_count_bits(m);
17603
17604
    /* Adding numbers into m->used * 2 digits - zero out unused digits. */
17605
#ifndef WOLFSSL_NO_CT_OPS
17606
    if (ct) {
17607
        for (i = 0; i < (unsigned int)m->used * 2; i++) {
17608
            a->dp[i] &= (sp_int_digit)
17609
                (sp_int_sdigit)ctMaskIntGTE((int)a->used - 1, (int)i);
17610
        }
17611
    }
17612
    else
17613
#endif /* !WOLFSSL_NO_CT_OPS */
17614
    {
17615
        for (i = a->used; i < (unsigned int)m->used * 2; i++) {
17616
            a->dp[i] = 0;
17617
        }
17618
    }
17619
17620
    /* Special case when modulus is 1 digit or less. */
17621
    if (m->used <= 1) {
17622
        /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17623
        mu = mp * a->dp[0];
17624
        /* a += mu * m */
17625
        w = a->dp[0];
17626
        w += (sp_int_word)mu * m->dp[0];
17627
        a->dp[0] = (sp_int_digit)w;
17628
        w >>= SP_WORD_SIZE;
17629
        w += a->dp[1];
17630
        a->dp[1] = (sp_int_digit)w;
17631
        w >>= SP_WORD_SIZE;
17632
        a->dp[2] = (sp_int_digit)w;
17633
        a->used = 3;
17634
        /* bits is SP_WORD_SIZE */
17635
        bits = SP_WORD_SIZE;
17636
    }
17637
    else {
17638
        /* 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
17639
         *    Mask when last digit of modulus doesn't have highest bit set.
17640
         */
17641
        volatile sp_int_digit mask = (sp_int_digit)
17642
            (((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1);
17643
        /* Overflow. */
17644
        sp_int_word o = 0;
17645
17646
        /* 2. For i = 0..NumDigits(m)-1 */
17647
        for (i = 0; i < m->used; i++) {
17648
            unsigned int j;
17649
17650
            /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
17651
            mu = mp * a->dp[i];
17652
            /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
17653
            if ((i == (unsigned int)m->used - 1) && (mask != 0)) {
17654
                mu &= mask;
17655
            }
17656
17657
            /* 2.3. a += mu * DigitMask(m, 0) */
17658
            w = a->dp[i];
17659
            w += (sp_int_word)mu * m->dp[0];
17660
            a->dp[i] = (sp_int_digit)w;
17661
            w >>= SP_WORD_SIZE;
17662
            /* 2.4. For j = 1 up to NumDigits(m)-2 */
17663
            for (j = 1; j < (unsigned int)m->used - 1; j++) {
17664
                /* 2.4.1 a += mu * DigitMask(m, j) */
17665
                w += a->dp[i + j];
17666
                w += (sp_int_word)mu * m->dp[j];
17667
                a->dp[i + j] = (sp_int_digit)w;
17668
                w >>= SP_WORD_SIZE;
17669
            }
17670
            /* Handle overflow. */
17671
            w += o;
17672
            w += a->dp[i + j];
17673
            o = (sp_int_digit)(w >> SP_WORD_SIZE);
17674
            /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
17675
            w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
17676
            a->dp[i + j] = (sp_int_digit)w;
17677
            w >>= SP_WORD_SIZE;
17678
            o += w;
17679
        }
17680
        /* Handle overflow. */
17681
        o += a->dp[m->used * 2 - 1];
17682
        a->dp[m->used * 2 - 1] = (sp_int_digit)o;
17683
        o >>= SP_WORD_SIZE;
17684
        a->dp[m->used * 2] = (sp_int_digit)o;
17685
        a->used = (sp_size_t)(m->used * 2 + 1);
17686
    }
17687
17688
    if (!ct) {
17689
        /* Remove leading zeros. */
17690
        sp_clamp(a);
17691
        /* 3. a >>= NumBits(m) */
17692
        (void)sp_rshb(a, bits, a);
17693
        /* 4. a = a mod m */
17694
        if (_sp_cmp_abs(a, m) != MP_LT) {
17695
            _sp_sub_off(a, m, a, 0);
17696
        }
17697
    }
17698
    else {
17699
        /* 3. a >>= NumBits(m) */
17700
        (void)sp_rshb(a, bits, a);
17701
        /* Constant time clamping. */
17702
        sp_clamp_ct(a);
17703
17704
        /* 4. a = a mod m
17705
         * Always subtract but at a too high offset if a is less than m.
17706
         */
17707
        _sp_submod_ct(a, m, m, m->used + 1U, a);
17708
    }
17709
17710
17711
#if 0
17712
    sp_print(a, "rr");
17713
#endif
17714
17715
    return MP_OKAY;
17716
#else /* !SQR_MUL_ASM */
17717
0
    unsigned int i;
17718
0
    unsigned int j;
17719
0
    int bits;
17720
0
    sp_int_digit mu;
17721
0
    sp_int_digit o;
17722
0
    volatile sp_int_digit mask;
17723
17724
#if 0
17725
    sp_print(a, "a");
17726
    sp_print(m, "m");
17727
#endif
17728
17729
0
    bits = sp_count_bits(m);
17730
0
    mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
17731
17732
0
#ifndef WOLFSSL_NO_CT_OPS
17733
0
    if (ct) {
17734
0
        for (i = 0; i < (unsigned int)m->used * 2; i++) {
17735
0
            a->dp[i] &= (sp_int_digit)
17736
0
                (sp_int_sdigit)ctMaskIntGTE((int)a->used - 1, (int)i);
17737
0
        }
17738
0
    }
17739
0
    else
17740
0
#endif
17741
0
    {
17742
0
        for (i = a->used; i < (unsigned int)m->used * 2; i++) {
17743
0
            a->dp[i] = 0;
17744
0
        }
17745
0
    }
17746
17747
0
    if (m->used <= 1) {
17748
0
        sp_int_digit l;
17749
0
        sp_int_digit h;
17750
17751
        /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17752
0
        mu = mp * a->dp[0];
17753
        /* a += mu * m */
17754
0
        l = a->dp[0];
17755
0
        h = 0;
17756
0
        SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17757
0
        a->dp[0] = l;
17758
0
        l = h;
17759
0
        h = 0;
17760
0
        SP_ASM_ADDC(l, h, a->dp[1]);
17761
0
        a->dp[1] = l;
17762
0
        a->dp[2] = h;
17763
0
        a->used = (sp_size_t)(m->used * 2 + 1);
17764
        /* bits is SP_WORD_SIZE */
17765
0
        bits = SP_WORD_SIZE;
17766
0
    }
17767
0
#if !defined(WOLFSSL_SP_MATH) && defined(HAVE_ECC)
17768
0
#if SP_WORD_SIZE == 64
17769
0
#if SP_INT_DIGITS >= 8
17770
0
    else if ((m->used == 4) && (mask == 0)) {
17771
0
        sp_int_digit l;
17772
0
        sp_int_digit h;
17773
0
        sp_int_digit o2;
17774
17775
0
        l = 0;
17776
0
        h = 0;
17777
0
        o = 0;
17778
0
        o2 = 0;
17779
        /* For i = 0..NumDigits(m)-1 */
17780
0
        for (i = 0; i < 4; i++) {
17781
            /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17782
0
            mu = mp * a->dp[0];
17783
0
            l = a->dp[0];
17784
            /* a = (a + mu * m) >> WORD_SIZE */
17785
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17786
0
            l = h;
17787
0
            h = 0;
17788
0
            SP_ASM_ADDC(l, h, a->dp[1]);
17789
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
17790
0
            a->dp[0] = l;
17791
0
            l = h;
17792
0
            h = 0;
17793
0
            SP_ASM_ADDC(l, h, a->dp[2]);
17794
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
17795
0
            a->dp[1] = l;
17796
0
            l = h;
17797
0
            h = o2;
17798
0
            o2 = 0;
17799
0
            SP_ASM_ADDC_REG(l, h, o);
17800
0
            SP_ASM_ADDC(l, h, a->dp[i + 3]);
17801
0
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
17802
0
            a->dp[2] = l;
17803
0
            o = h;
17804
0
            l = h;
17805
0
            h = 0;
17806
0
        }
17807
        /* Handle overflow. */
17808
0
        SP_ASM_ADDC(l, o2, a->dp[7]);
17809
0
        a->dp[3] = l;
17810
0
        a->dp[4] = o2;
17811
0
        a->used = 5;
17812
17813
        /* Remove leading zeros. */
17814
0
        sp_clamp(a);
17815
17816
        /* a = a mod m */
17817
0
        if (_sp_cmp_abs(a, m) != MP_LT) {
17818
0
            _sp_sub_off(a, m, a, 0);
17819
0
        }
17820
17821
0
        return MP_OKAY;
17822
0
    }
17823
0
#endif /* SP_INT_DIGITS >= 8 */
17824
0
#if SP_INT_DIGITS >= 12
17825
0
    else if ((m->used == 6) && (mask == 0)) {
17826
0
        sp_int_digit l;
17827
0
        sp_int_digit h;
17828
0
        sp_int_digit o2;
17829
17830
0
        l = 0;
17831
0
        h = 0;
17832
0
        o = 0;
17833
0
        o2 = 0;
17834
        /* For i = 0..NumDigits(m)-1 */
17835
0
        for (i = 0; i < 6; i++) {
17836
            /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17837
0
            mu = mp * a->dp[0];
17838
0
            l = a->dp[0];
17839
            /* a = (a + mu * m) >> WORD_SIZE */
17840
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17841
0
            l = h;
17842
0
            h = 0;
17843
0
            SP_ASM_ADDC(l, h, a->dp[1]);
17844
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
17845
0
            a->dp[0] = l;
17846
0
            l = h;
17847
0
            h = 0;
17848
0
            SP_ASM_ADDC(l, h, a->dp[2]);
17849
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
17850
0
            a->dp[1] = l;
17851
0
            l = h;
17852
0
            h = 0;
17853
0
            SP_ASM_ADDC(l, h, a->dp[3]);
17854
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
17855
0
            a->dp[2] = l;
17856
0
            l = h;
17857
0
            h = 0;
17858
0
            SP_ASM_ADDC(l, h, a->dp[4]);
17859
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
17860
0
            a->dp[3] = l;
17861
0
            l = h;
17862
0
            h = o2;
17863
0
            o2 = 0;
17864
0
            SP_ASM_ADDC_REG(l, h, o);
17865
0
            SP_ASM_ADDC(l, h, a->dp[i + 5]);
17866
0
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
17867
0
            a->dp[4] = l;
17868
0
            o = h;
17869
0
            l = h;
17870
0
            h = 0;
17871
0
        }
17872
        /* Handle overflow. */
17873
0
        SP_ASM_ADDC(l, o2, a->dp[11]);
17874
0
        a->dp[5] = l;
17875
0
        a->dp[6] = o2;
17876
0
        a->used = 7;
17877
17878
        /* Remove leading zeros. */
17879
0
        sp_clamp(a);
17880
17881
        /* a = a mod m */
17882
0
        if (_sp_cmp_abs(a, m) != MP_LT) {
17883
0
            _sp_sub_off(a, m, a, 0);
17884
0
        }
17885
17886
0
        return MP_OKAY;
17887
0
    }
17888
0
#endif /* SP_INT_DIGITS >= 12 */
17889
#elif SP_WORD_SIZE == 32
17890
    else if ((m->used <= 12) && (mask == 0)) {
17891
        sp_int_digit l;
17892
        sp_int_digit h;
17893
        sp_int_digit o2;
17894
        sp_int_digit* ad;
17895
        const sp_int_digit* md;
17896
17897
        o = 0;
17898
        o2 = 0;
17899
        ad = a->dp;
17900
        /* For i = 0..NumDigits(m)-1 */
17901
        for (i = 0; i < m->used; i++) {
17902
            md = m->dp;
17903
            /*  mu = (mp * DigitMask(a, i)) & WORD_MASK */
17904
            mu = mp * ad[0];
17905
17906
            /* a = (a + mu * m, 0) >> WORD_SIZE */
17907
            l = ad[0];
17908
            h = 0;
17909
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17910
            l = h;
17911
            for (j = 1; j < (unsigned int)m->used - 2; j += 2) {
17912
                h = 0;
17913
                SP_ASM_ADDC(l, h, ad[j]);
17914
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17915
                ad[j - 1] = l;
17916
                l = 0;
17917
                SP_ASM_ADDC(h, l, ad[j + 1]);
17918
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
17919
                ad[j] = h;
17920
            }
17921
            for (; j < (unsigned int)m->used - 1; j++) {
17922
                h = 0;
17923
                SP_ASM_ADDC(l, h, ad[j]);
17924
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17925
                ad[j - 1] = l;
17926
                l = h;
17927
            }
17928
            h = o2;
17929
            o2 = 0;
17930
            SP_ASM_ADDC_REG(l, h, o);
17931
            SP_ASM_ADDC(l, h, ad[i + j]);
17932
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
17933
            ad[j - 1] = l;
17934
            o = h;
17935
        }
17936
        /* Handle overflow. */
17937
        SP_ASM_ADDC(o, o2, a->dp[m->used * 2 - 1]);
17938
        a->dp[m->used  - 1] = o;
17939
        a->dp[m->used] = o2;
17940
        a->used = m->used + 1;
17941
17942
        /* Remove leading zeros. */
17943
        sp_clamp(a);
17944
17945
        /* a = a mod m */
17946
        if (_sp_cmp_abs(a, m) != MP_LT) {
17947
            _sp_sub_off(a, m, a, 0);
17948
        }
17949
17950
        return MP_OKAY;
17951
    }
17952
#endif /* SP_WORD_SIZE == 64 | 32 */
17953
0
#endif /* !WOLFSSL_SP_MATH && HAVE_ECC */
17954
0
    else {
17955
0
        sp_int_digit l;
17956
0
        sp_int_digit h;
17957
0
        sp_int_digit o2;
17958
0
        sp_int_digit* ad;
17959
0
        const sp_int_digit* md;
17960
17961
0
        o = 0;
17962
0
        o2 = 0;
17963
0
        ad = a->dp;
17964
        /* 2. For i = 0..NumDigits(m)-1 */
17965
0
        for (i = 0; i < m->used; i++, ad++) {
17966
0
            md = m->dp;
17967
            /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
17968
0
            mu = mp * ad[0];
17969
            /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
17970
0
            if ((i == (unsigned int)m->used - 1) && (mask != 0)) {
17971
0
                mu &= mask;
17972
0
            }
17973
17974
            /* 2.3 a += mu * DigitMask(m, 0) */
17975
0
            l = ad[0];
17976
0
            h = 0;
17977
0
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17978
0
            ad[0] = l;
17979
0
            l = h;
17980
            /* 2.4. For j = 1 up to NumDigits(m)-2 */
17981
0
            for (j = 1; j < (unsigned int)m->used - 2; j += 2) {
17982
0
                h = 0;
17983
                /* 2.4.1. a += mu * DigitMask(m, j) */
17984
0
                SP_ASM_ADDC(l, h, ad[j + 0]);
17985
0
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17986
0
                ad[j + 0] = l;
17987
0
                l = 0;
17988
                /* 2.4.1. a += mu * DigitMask(m, j) */
17989
0
                SP_ASM_ADDC(h, l, ad[j + 1]);
17990
0
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
17991
0
                ad[j + 1] = h;
17992
0
            }
17993
0
            for (; j < (unsigned int)m->used - 1; j++) {
17994
0
                h = 0;
17995
                /* 2.4.1. a += mu * DigitMask(m, j) */
17996
0
                SP_ASM_ADDC(l, h, ad[j]);
17997
0
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17998
0
                ad[j] = l;
17999
0
                l = h;
18000
0
            }
18001
0
            h = o2;
18002
0
            o2 = 0;
18003
0
            SP_ASM_ADDC_REG(l, h, o);
18004
            /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
18005
0
            SP_ASM_ADDC(l, h, ad[j]);
18006
0
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
18007
0
            ad[j] = l;
18008
0
            o = h;
18009
0
        }
18010
        /* Handle overflow. */
18011
0
        SP_ASM_ADDC(o, o2, a->dp[m->used * 2 - 1]);
18012
0
        a->dp[m->used * 2 - 1] = o;
18013
0
        a->dp[m->used * 2] = o2;
18014
0
        a->used = (sp_size_t)(m->used * 2 + 1);
18015
0
    }
18016
18017
0
    if (!ct) {
18018
        /* Remove leading zeros. */
18019
0
        sp_clamp(a);
18020
0
        (void)sp_rshb(a, bits, a);
18021
        /* a = a mod m */
18022
0
        if (_sp_cmp_abs(a, m) != MP_LT) {
18023
0
            _sp_sub_off(a, m, a, 0);
18024
0
        }
18025
0
    }
18026
0
    else {
18027
0
        (void)sp_rshb(a, bits, a);
18028
        /* Constant time clamping. */
18029
0
        sp_clamp_ct(a);
18030
18031
0
        _sp_submod_ct(a, m, m, m->used + 1U, a);
18032
0
    }
18033
18034
#if 0
18035
    sp_print(a, "rr");
18036
#endif
18037
18038
0
    return MP_OKAY;
18039
0
#endif /* !SQR_MUL_ASM */
18040
0
}
18041
18042
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
18043
    (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
18044
/* Reduce a number in Montgomery form.
18045
 *
18046
 * @param [in, out] a   SP integer to Montgomery reduce.
18047
 * @param [in]      m   SP integer that is the modulus.
18048
 * @param [in]      mp  SP integer digit that is the bottom digit of inv(-m).
18049
 * @param [in]      ct  Indicates operation must be constant time.
18050
 *
18051
 * @return  MP_OKAY on success.
18052
 * @return  MP_VAL when a or m is NULL or m is zero.
18053
 */
18054
int sp_mont_red_ex(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
18055
0
{
18056
0
    int err;
18057
18058
    /* Validate parameters. */
18059
0
    if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
18060
0
        err = MP_VAL;
18061
0
    }
18062
#ifdef WOLFSSL_SP_INT_NEGATIVE
18063
    else if ((a->sign == MP_NEG) || (m->sign == MP_NEG)) {
18064
        err = MP_VAL;
18065
    }
18066
#endif
18067
    /* Ensure a has enough space for calculation. */
18068
0
    else if (a->size < m->used * 2 + 1) {
18069
0
        err = MP_VAL;
18070
0
    }
18071
0
    else {
18072
        /* Perform Montgomery Reduction. */
18073
0
        err = _sp_mont_red(a, m, mp, ct);
18074
0
    }
18075
18076
0
    return err;
18077
0
}
18078
#endif
18079
18080
/* Calculate the bottom digit of the inverse of negative m.
18081
 * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
18082
 *
18083
 * Used when performing Montgomery Reduction.
18084
 * m must be odd.
18085
 * Jeffrey Hurchalla's method.
18086
 *   https://arxiv.org/pdf/2204.04342.pdf
18087
 *
18088
 * @param [in]  m    SP integer that is the modulus.
18089
 * @param [out] rho  SP integer digit that is the bottom digit of inv(-m).
18090
 */
18091
static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho)
18092
0
{
18093
0
    sp_int_digit d = m->dp[0];
18094
0
    sp_int_digit x = (3 * d) ^ 2;
18095
0
    sp_int_digit y = 1 - d * x;
18096
18097
0
#if SP_WORD_SIZE >= 16
18098
0
    x *= 1 + y; y *= y;
18099
0
#endif
18100
0
#if SP_WORD_SIZE >= 32
18101
0
    x *= 1 + y; y *= y;
18102
0
#endif
18103
0
#if SP_WORD_SIZE >= 64
18104
0
    x *= 1 + y; y *= y;
18105
0
#endif
18106
0
    x *= 1 + y;
18107
18108
    /* rho = -1/m mod d, subtract x (unsigned) from 0, assign negative */
18109
0
    *rho = (sp_int_digit)((sp_int_sdigit)0 - (sp_int_sdigit)x);
18110
0
}
18111
18112
/* Calculate the bottom digit of the inverse of negative m.
18113
 * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
18114
 *
18115
 * Used when performing Montgomery Reduction.
18116
 *
18117
 * @param [in]  m    SP integer that is the modulus.
18118
 * @param [out] rho  SP integer digit that is the bottom digit of inv(-m).
18119
 *
18120
 * @return  MP_OKAY on success.
18121
 * @return  MP_VAL when m or rho is NULL.
18122
 */
18123
int sp_mont_setup(const sp_int* m, sp_int_digit* rho)
18124
0
{
18125
0
    int err = MP_OKAY;
18126
18127
    /* Validate parameters. */
18128
0
    if ((m == NULL) || (rho == NULL)) {
18129
0
        err = MP_VAL;
18130
0
    }
18131
    /* Calculation only works with odd modulus. */
18132
0
    if ((err == MP_OKAY) && !sp_isodd(m)) {
18133
0
        err = MP_VAL;
18134
0
    }
18135
18136
0
    if (err == MP_OKAY) {
18137
        /* Calculate negative of inverse mod 2^n. */
18138
0
        _sp_mont_setup(m, rho);
18139
0
    }
18140
18141
0
    return err;
18142
0
}
18143
18144
/* Calculate the normalization value of m.
18145
 *   norm = 2^k - m, where k is the number of bits in m
18146
 *
18147
 * @param [out] norm  SP integer that normalizes numbers into Montgomery form.
18148
 * @param [in]  m     SP integer that is the modulus.
18149
 *
18150
 * @return  MP_OKAY on success.
18151
 * @return  MP_VAL when norm or m is NULL, or number of bits in m is maximal.
18152
 */
18153
int sp_mont_norm(sp_int* norm, const sp_int* m)
18154
0
{
18155
0
    int err = MP_OKAY;
18156
0
    unsigned int bits = 0;
18157
18158
    /* Validate parameters. */
18159
0
    if ((norm == NULL) || (m == NULL)) {
18160
0
        err = MP_VAL;
18161
0
    }
18162
0
    if (err == MP_OKAY) {
18163
        /* Find top bit and ensure norm has enough space. */
18164
0
        bits = (unsigned int)sp_count_bits(m);
18165
        /* NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) */
18166
        /* clang-tidy falsely believes that norm->size was corrupted by the
18167
         * _sp_copy() to "Set real working value to base." in _sp_exptmod_ex().
18168
         */
18169
0
        if (bits >= (unsigned int)norm->size * SP_WORD_SIZE) {
18170
0
            err = MP_VAL;
18171
0
        }
18172
        /* NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) */
18173
0
    }
18174
0
    if (err == MP_OKAY) {
18175
        /* Round up for case when m is less than a word - no advantage in using
18176
         * a smaller mask and would take more operations.
18177
         */
18178
0
        if (bits < SP_WORD_SIZE) {
18179
0
            bits = SP_WORD_SIZE;
18180
0
        }
18181
        /* Smallest number greater than m of form 2^n. */
18182
0
        _sp_zero(norm);
18183
0
        err = sp_set_bit(norm, (int)bits);
18184
0
    }
18185
0
    if (err == MP_OKAY) {
18186
        /* norm = 2^n % m */
18187
0
        err = sp_sub(norm, m, norm);
18188
0
    }
18189
0
    if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
18190
        /* Sub made norm one word and now finish calculation. */
18191
0
        norm->dp[0] %= m->dp[0];
18192
0
    }
18193
0
    if (err == MP_OKAY) {
18194
        /* Remove leading zeros. */
18195
0
        sp_clamp(norm);
18196
0
    }
18197
18198
0
    return err;
18199
0
}
18200
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
18201
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
18202
18203
/*********************************
18204
 * To and from binary and strings.
18205
 *********************************/
18206
18207
/* Calculate the number of 8-bit values required to represent the
18208
 * multi-precision number.
18209
 *
18210
 * When a is NULL, returns 0.
18211
 *
18212
 * @param [in] a  SP integer.
18213
 *
18214
 * @return  The count of 8-bit values.
18215
 * @return  0 when a is NULL.
18216
 */
18217
int sp_unsigned_bin_size(const sp_int* a)
18218
0
{
18219
0
    int cnt = 0;
18220
18221
0
    if (a != NULL) {
18222
0
        cnt = (sp_count_bits(a) + 7) >> 3;
18223
0
    }
18224
18225
0
    return cnt;
18226
0
}
18227
18228
/* Convert a number as an array of bytes in big-endian format to a
18229
 * multi-precision number.
18230
 *
18231
 * @param [out] a     SP integer.
18232
 * @param [in]  in    Array of bytes.
18233
 * @param [in]  inSz  Number of data bytes in array.
18234
 *
18235
 * @return  MP_OKAY on success.
18236
 * @return  MP_VAL when the number is too big to fit in an SP integer.
18237
 */
18238
int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
18239
0
{
18240
0
    int err = MP_OKAY;
18241
18242
    /* Validate parameters. */
18243
0
    if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
18244
0
        err = MP_VAL;
18245
0
    }
18246
18247
    /* Check a has enough space for number. */
18248
0
    if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
18249
0
        err = MP_VAL;
18250
0
    }
18251
18252
0
    if (err == MP_OKAY) {
18253
        /* Load full digits at a time from in. */
18254
0
        int i;
18255
0
        int j = 0;
18256
18257
0
        a->used = (sp_size_t)((inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF);
18258
18259
    #if defined(BIG_ENDIAN_ORDER) && !defined(WOLFSSL_SP_INT_DIGIT_ALIGN)
18260
        /* Data endian matches representation of number.
18261
         * Directly copy if we don't have alignment issues.
18262
         */
18263
        for (i = (int)(inSz-1); i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF) {
18264
            a->dp[j++] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
18265
        }
18266
    #else
18267
        /* Construct digit from required number of bytes. */
18268
0
        for (i = (int)(inSz-1); i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
18269
0
            a->dp[j]  = ((sp_int_digit)in[i - 0] <<  0)
18270
0
        #if SP_WORD_SIZE >= 16
18271
0
                      | ((sp_int_digit)in[i - 1] <<  8)
18272
0
        #endif
18273
0
        #if SP_WORD_SIZE >= 32
18274
0
                      | ((sp_int_digit)in[i - 2] << 16) |
18275
0
                        ((sp_int_digit)in[i - 3] << 24)
18276
0
        #endif
18277
0
        #if SP_WORD_SIZE >= 64
18278
0
                      | ((sp_int_digit)in[i - 4] << 32) |
18279
0
                        ((sp_int_digit)in[i - 5] << 40) |
18280
0
                        ((sp_int_digit)in[i - 6] << 48) |
18281
0
                        ((sp_int_digit)in[i - 7] << 56)
18282
0
        #endif
18283
0
                                                       ;
18284
0
            j++;
18285
0
        }
18286
0
    #endif
18287
18288
0
#if SP_WORD_SIZE >= 16
18289
        /* Handle leftovers. */
18290
0
        if (i >= 0) {
18291
    #ifdef BIG_ENDIAN_ORDER
18292
            int s;
18293
18294
            /* Place remaining bytes into last digit. */
18295
            a->dp[a->used - 1] = 0;
18296
            for (s = 0; i >= 0; i--,s += 8) {
18297
                a->dp[j] |= ((sp_int_digit)in[i]) << s;
18298
            }
18299
    #else
18300
            /* Cast digits to an array of bytes so we can insert directly. */
18301
0
            byte *d = (byte*)a->dp;
18302
18303
            /* Zero out all bytes in last digit. */
18304
0
            a->dp[a->used - 1] = 0;
18305
            /* Place remaining bytes directly into digit. */
18306
0
            switch (i) {
18307
0
            #if SP_WORD_SIZE >= 64
18308
0
                case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
18309
0
                case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
18310
0
                case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
18311
0
                case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
18312
0
            #endif
18313
0
            #if SP_WORD_SIZE >= 32
18314
0
                case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
18315
0
                case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
18316
0
            #endif
18317
0
                case 0: d[inSz - 1 - 0] = in[0];
18318
0
            }
18319
0
    #endif /* LITTLE_ENDIAN_ORDER */
18320
0
        }
18321
0
#endif
18322
0
        sp_clamp_ct(a);
18323
0
    }
18324
18325
0
    return err;
18326
0
}
18327
18328
/* Convert the multi-precision number to an array of bytes in big-endian format.
18329
 *
18330
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18331
 * to calculate the number of bytes required.
18332
 *
18333
 * @param [in]  a    SP integer.
18334
 * @param [out] out  Array to put encoding into.
18335
 *
18336
 * @return  MP_OKAY on success.
18337
 * @return  MP_VAL when a or out is NULL.
18338
 */
18339
int sp_to_unsigned_bin(const sp_int* a, byte* out)
18340
0
{
18341
    /* Write assuming output buffer is big enough. */
18342
0
    return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
18343
0
}
18344
18345
/* Convert the multi-precision number to an array of bytes in big-endian format.
18346
 *
18347
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18348
 * to calculate the number of bytes required.
18349
 * Front-pads the output array with zeros to make number the size of the array.
18350
 *
18351
 * @param [in]  a      SP integer.
18352
 * @param [out] out    Array to put encoding into.
18353
 * @param [in]  outSz  Size of the array in bytes.
18354
 *
18355
 * @return  MP_OKAY on success.
18356
 * @return  MP_VAL when a or out is NULL.
18357
 */
18358
int sp_to_unsigned_bin_len(const sp_int* a, byte* out, int outSz)
18359
0
{
18360
0
    int err = MP_OKAY;
18361
18362
    /* Validate parameters. */
18363
0
    if ((a == NULL) || (out == NULL) || (outSz < 0)) {
18364
0
        err = MP_VAL;
18365
0
    }
18366
18367
0
#if SP_WORD_SIZE > 8
18368
0
    if (err == MP_OKAY) {
18369
        /* Start at the end of the buffer - least significant byte. */
18370
0
        int j = outSz - 1;
18371
18372
0
        if (!sp_iszero(a)) {
18373
0
            unsigned int i;
18374
18375
            /* Put each digit in. */
18376
0
            for (i = 0; (j >= 0) && (i < a->used); i++) {
18377
0
                int b;
18378
0
                sp_int_digit d = a->dp[i];
18379
                /* Place each byte of a digit into the buffer. */
18380
0
                for (b = 0; b < SP_WORD_SIZE; b += 8) {
18381
0
                    out[j--] = (byte)d;
18382
0
                    d >>= 8;
18383
                    /* Stop if the output buffer is filled. */
18384
0
                    if (j < 0) {
18385
0
                        if ((i < (unsigned int)a->used - 1) || (d > 0)) {
18386
0
                            err = MP_VAL;
18387
0
                        }
18388
0
                        break;
18389
0
                    }
18390
0
                }
18391
0
            }
18392
0
        }
18393
        /* Front pad buffer with 0s. */
18394
0
        for (; j >= 0; j--) {
18395
0
            out[j] = 0;
18396
0
        }
18397
0
    }
18398
#else
18399
    if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
18400
        err = MP_VAL;
18401
    }
18402
    if (err == MP_OKAY) {
18403
        unsigned int i;
18404
        int j;
18405
18406
        XMEMSET(out, 0, (unsigned int)outSz - a->used);
18407
18408
        for (i = 0, j = outSz - 1; i < a->used; i++, j--) {
18409
            out[j] = a->dp[i];
18410
        }
18411
    }
18412
#endif
18413
18414
0
    return err;
18415
0
}
18416
18417
/* Convert the multi-precision number to an array of bytes in big-endian format.
18418
 *
18419
 * Constant-time implementation.
18420
 *
18421
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18422
 * to calculate the number of bytes required.
18423
 * Front-pads the output array with zeros to make number the size of the array.
18424
 *
18425
 * @param [in]  a      SP integer.
18426
 * @param [out] out    Array to put encoding into.
18427
 * @param [in]  outSz  Size of the array in bytes.
18428
 *
18429
 * @return  MP_OKAY on success.
18430
 * @return  MP_VAL when a or out is NULL.
18431
 */
18432
int sp_to_unsigned_bin_len_ct(const sp_int* a, byte* out, int outSz)
18433
0
{
18434
0
    int err = MP_OKAY;
18435
18436
    /* Validate parameters. */
18437
0
    if ((a == NULL) || (out == NULL) || (outSz < 0)) {
18438
0
        err = MP_VAL;
18439
0
    }
18440
18441
0
#if SP_WORD_SIZE > 8
18442
0
    if (err == MP_OKAY) {
18443
        /* Start at the end of the buffer - least significant byte. */
18444
0
        int j;
18445
0
        unsigned int i;
18446
0
        byte mask = (byte)-1;
18447
0
        sp_int_digit d;
18448
18449
        /* Put each digit in. */
18450
0
        i = 0;
18451
0
        for (j = outSz - 1; j >= 0; ) {
18452
0
            unsigned int b;
18453
0
            volatile byte notFull = ctMaskLT((int)i, (int)a->used - 1);
18454
18455
0
            d = a->dp[i];
18456
            /* Place each byte of a digit into the buffer. */
18457
0
            for (b = 0; (j >= 0) && (b < SP_WORD_SIZEOF); b++) {
18458
0
                out[j--] = (byte)(d & mask);
18459
0
                d >>= 8;
18460
0
            }
18461
0
            mask &= notFull;
18462
0
            i += (unsigned int)(1 & mask);
18463
0
        }
18464
0
    }
18465
#else
18466
    if (err == MP_OKAY) {
18467
        int i;
18468
        int j;
18469
        volatile sp_int_digit mask = (sp_int_digit)-1;
18470
18471
        i = 0;
18472
        for (j = outSz - 1; j >= 0; j--) {
18473
            out[j] = a->dp[i] & mask;
18474
            mask &= (sp_int_digit)0 - (i < (int)a->used - 1);
18475
            i += 1 & mask;
18476
        }
18477
    }
18478
#endif
18479
18480
0
    return err;
18481
0
}
18482
18483
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
18484
    !defined(WOLFSSL_RSA_VERIFY_ONLY)
18485
/* Store the number in big-endian format in array at an offset.
18486
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18487
 * to calculate the number of bytes required.
18488
 *
18489
 * @param [in]  o    Offset into array to start encoding.
18490
 * @param [in]  a    SP integer.
18491
 * @param [out] out  Array to put encoding into.
18492
 *
18493
 * @return  Index of next byte after data.
18494
 * @return  MP_VAL when a or out is NULL.
18495
 */
18496
int sp_to_unsigned_bin_at_pos(int o, const sp_int* a, unsigned char* out)
18497
0
{
18498
    /* Get length of data that will be written. */
18499
0
    int len = sp_unsigned_bin_size(a);
18500
    /* Write number to buffer at offset. */
18501
0
    int ret = sp_to_unsigned_bin_len(a, out + o, len);
18502
18503
0
    if (ret == MP_OKAY) {
18504
        /* Return offset of next byte after number. */
18505
0
        ret = o + len;
18506
0
    }
18507
18508
0
    return ret;
18509
0
}
18510
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
18511
18512
#ifdef WOLFSSL_SP_READ_RADIX_16
18513
/* Convert hexadecimal number as string in big-endian format to a
18514
 * multi-precision number.
18515
 *
18516
 * Assumes negative sign and leading zeros have been stripped.
18517
 *
18518
 * @param [out] a   SP integer.
18519
 * @param [in]  in  NUL terminated string.
18520
 *
18521
 * @return  MP_OKAY on success.
18522
 * @return  MP_VAL when a character is not valid or not enough space in a.
18523
 */
18524
static int _sp_read_radix_16(sp_int* a, const char* in)
18525
0
{
18526
0
    int err = MP_OKAY;
18527
0
    int i;
18528
0
    unsigned int s = 0;
18529
0
    sp_size_t j = 0;
18530
0
    sp_int_digit d;
18531
    /* Skip whitespace at end of line */
18532
0
    int eol_done = 0;
18533
18534
    /* Make all nibbles in digit 0. */
18535
0
    d = 0;
18536
    /* Step through string a character at a time starting at end - least
18537
     * significant byte. */
18538
0
    for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
18539
0
        volatile char c = in[i];
18540
        /* Convert character from hex. */
18541
0
        int ch = (int)HexCharToByte(c);
18542
        /* Check for invalid character. */
18543
0
        if (ch < 0) {
18544
0
            if (!eol_done && CharIsWhiteSpace(c))
18545
0
                continue;
18546
0
            err = MP_VAL;
18547
0
            break;
18548
0
        }
18549
0
        eol_done = 1;
18550
18551
        /* Check whether we have filled the digit. */
18552
0
        if (s == SP_WORD_SIZE) {
18553
            /* Store digit and move index to next in a. */
18554
0
            a->dp[j++] = d;
18555
            /* Fail if we are out of space in a. */
18556
0
            if (j >= a->size) {
18557
0
                err = MP_VAL;
18558
0
                break;
18559
0
            }
18560
            /* Set shift back to 0 - lowest nibble. */
18561
0
            s = 0;
18562
            /* Make all nibbles in digit 0. */
18563
0
            d = 0;
18564
0
        }
18565
18566
        /* Put next nibble into digit. */
18567
0
        d |= ((sp_int_digit)ch) << s;
18568
        /* Update shift for next nibble. */
18569
0
        s += 4;
18570
0
    }
18571
18572
0
    if (err == MP_OKAY) {
18573
        /* If space, store last digit. */
18574
0
        if (j < a->size) {
18575
0
            a->dp[j] = d;
18576
0
        }
18577
        /* Update used count. */
18578
0
        a->used = (sp_size_t)(j + 1U);
18579
        /* Remove leading zeros. */
18580
0
        sp_clamp(a);
18581
0
    }
18582
18583
0
    return err;
18584
0
}
18585
#endif /* WOLFSSL_SP_READ_RADIX_16 */
18586
18587
#ifdef WOLFSSL_SP_READ_RADIX_10
18588
/* Convert decimal number as string in big-endian format to a multi-precision
18589
 * number.
18590
 *
18591
 * Assumes negative sign and leading zeros have been stripped.
18592
 *
18593
 * @param [out] a   SP integer.
18594
 * @param [in]  in  NUL terminated string.
18595
 *
18596
 * @return  MP_OKAY on success.
18597
 * @return  MP_VAL when a character is not valid.
18598
 */
18599
static int _sp_read_radix_10(sp_int* a, const char* in)
18600
0
{
18601
0
    int  err = MP_OKAY;
18602
0
    int  i;
18603
18604
    /* Start with a being zero. */
18605
0
    _sp_zero(a);
18606
18607
    /* Process all characters. */
18608
0
    for (i = 0; in[i] != '\0'; i++) {
18609
        /* Get character. */
18610
0
        volatile char ch = in[i];
18611
        /* Check character is valid. */
18612
0
        if ((ch >= '0') && (ch <= '9')) {
18613
            /* Assume '0'..'9' are continuous values as characters. */
18614
0
            ch = (char)(ch - '0');
18615
0
        }
18616
0
        else {
18617
0
            if (CharIsWhiteSpace(ch))
18618
0
                continue;
18619
            /* Return error on invalid character. */
18620
0
            err = MP_VAL;
18621
0
            break;
18622
0
        }
18623
18624
        /* Multiply a by 10. */
18625
0
        err = _sp_mul_d(a, 10, a, 0);
18626
0
        if (err != MP_OKAY) {
18627
0
            break;
18628
0
        }
18629
        /* Add character value. */
18630
0
        err = _sp_add_d(a, (sp_int_digit)ch, a);
18631
0
        if (err != MP_OKAY) {
18632
0
            break;
18633
0
        }
18634
0
    }
18635
18636
0
    return err;
18637
0
}
18638
#endif /* WOLFSSL_SP_READ_RADIX_10 */
18639
18640
#if defined(WOLFSSL_SP_READ_RADIX_16) || defined(WOLFSSL_SP_READ_RADIX_10)
18641
/* Convert a number as string in big-endian format to a big number.
18642
 * Only supports base-16 (hexadecimal) and base-10 (decimal).
18643
 *
18644
 * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
18645
 *
18646
 * @param [out] a      SP integer.
18647
 * @param [in]  in     NUL terminated string.
18648
 * @param [in]  radix  Number of values in a digit.
18649
 *
18650
 * @return  MP_OKAY on success.
18651
 * @return  MP_VAL when a or in is NULL, radix is not supported, value is
18652
 *          negative, or a character is not valid.
18653
 */
18654
int sp_read_radix(sp_int* a, const char* in, int radix)
18655
0
{
18656
0
    int err = MP_OKAY;
18657
#ifdef WOLFSSL_SP_INT_NEGATIVE
18658
    sp_uint8 sign = MP_ZPOS;
18659
#endif
18660
18661
0
    if ((a == NULL) || (in == NULL)) {
18662
0
        err = MP_VAL;
18663
0
    }
18664
18665
0
    if (err == MP_OKAY) {
18666
0
    #ifndef WOLFSSL_SP_INT_NEGATIVE
18667
0
        if (*in == '-') {
18668
0
            err = MP_VAL;
18669
0
        }
18670
0
        else
18671
0
    #endif
18672
0
        {
18673
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18674
            if (*in == '-') {
18675
                /* Make number negative if signed string. */
18676
                sign = MP_NEG;
18677
                in++;
18678
            }
18679
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18680
            /* Skip leading zeros. */
18681
0
            while (*in == '0') {
18682
0
                in++;
18683
0
            }
18684
18685
0
            if (radix == 16) {
18686
0
                err = _sp_read_radix_16(a, in);
18687
0
            }
18688
0
        #ifdef WOLFSSL_SP_READ_RADIX_10
18689
0
            else if (radix == 10) {
18690
0
                err = _sp_read_radix_10(a, in);
18691
0
            }
18692
0
        #endif
18693
0
            else {
18694
0
                err = MP_VAL;
18695
0
            }
18696
18697
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18698
            /* Ensure not negative when zero. */
18699
            if (err == MP_OKAY) {
18700
                if (sp_iszero(a)) {
18701
                    a->sign = MP_ZPOS;
18702
                }
18703
                else {
18704
                    a->sign = sign;
18705
                }
18706
            }
18707
        #endif
18708
0
        }
18709
0
    }
18710
18711
0
    return err;
18712
0
}
18713
#endif /* WOLFSSL_SP_READ_RADIX_16 || WOLFSSL_SP_READ_RADIX_10 */
18714
18715
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18716
    defined(WC_MP_TO_RADIX)
18717
/* Put the big-endian, hex string encoding of a into str.
18718
 *
18719
 * Assumes str is large enough for result.
18720
 * Use sp_radix_size() to calculate required length.
18721
 *
18722
 * @param [in]  a    SP integer to convert.
18723
 * @param [out] str  String to hold hex string result.
18724
 *
18725
 * @return  MP_OKAY on success.
18726
 * @return  MP_VAL when a or str is NULL.
18727
 */
18728
int sp_tohex(const sp_int* a, char* str)
18729
0
{
18730
0
    int err = MP_OKAY;
18731
18732
    /* Validate parameters. */
18733
0
    if ((a == NULL) || (str == NULL)) {
18734
0
        err = MP_VAL;
18735
0
    }
18736
18737
0
    if (err == MP_OKAY) {
18738
        /* Quick out if number is zero. */
18739
0
        if (sp_iszero(a) == MP_YES) {
18740
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18741
            /* Make string represent complete bytes. */
18742
0
            *str++ = '0';
18743
0
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18744
0
            *str++ = '0';
18745
0
        }
18746
0
        else {
18747
0
            int i;
18748
0
            int j;
18749
0
            sp_int_digit d;
18750
18751
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18752
            if (a->sign == MP_NEG) {
18753
                /* Add negative sign character. */
18754
                *str = '-';
18755
                str++;
18756
            }
18757
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18758
18759
            /* Start at last digit - most significant digit. */
18760
0
            i = (int)(a->used - 1);
18761
0
            d = a->dp[i];
18762
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18763
            /* Find highest non-zero byte in most-significant word. */
18764
0
            for (j = SP_WORD_SIZE - 8; j >= 0 && i >= 0; j -= 8) {
18765
                /* When a byte at this index is not 0 break out to start
18766
                 * writing.
18767
                 */
18768
0
                if (((d >> j) & 0xff) != 0) {
18769
0
                    break;
18770
0
                }
18771
                /* Skip this digit if it was 0. */
18772
0
                if (j == 0) {
18773
0
                    j = SP_WORD_SIZE - 8;
18774
0
                    d = a->dp[--i];
18775
0
                }
18776
0
            }
18777
            /* Start with high nibble of byte. */
18778
0
            j += 4;
18779
        #else
18780
            /* Find highest non-zero nibble in most-significant word. */
18781
            for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
18782
                /* When a nibble at this index is not 0 break out to start
18783
                 * writing.
18784
                 */
18785
                if (((d >> j) & 0xf) != 0) {
18786
                    break;
18787
                }
18788
                /* Skip this digit if it was 0. */
18789
                if (j == 0) {
18790
                    j = SP_WORD_SIZE - 4;
18791
                    d = a->dp[--i];
18792
                }
18793
            }
18794
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18795
            /* Write out as much as required from most-significant digit. */
18796
0
            for (; j >= 0; j -= 4) {
18797
0
                *(str++) = ByteToHex((byte)(d >> j));
18798
0
            }
18799
            /* Write rest of digits. */
18800
0
            for (--i; i >= 0; i--) {
18801
                /* Get digit from memory. */
18802
0
                d = a->dp[i];
18803
                /* Write out all nibbles of digit. */
18804
0
                for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
18805
0
                    *(str++) = (char)ByteToHex((byte)(d >> j));
18806
0
                }
18807
0
            }
18808
0
        }
18809
        /* Terminate string. */
18810
0
        *str = '\0';
18811
0
    }
18812
18813
0
    return err;
18814
0
}
18815
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18816
18817
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18818
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
18819
    defined(WC_MP_TO_RADIX)
18820
/* Put the big-endian, decimal string encoding of a into str.
18821
 *
18822
 * Assumes str is large enough for result.
18823
 * Use sp_radix_size() to calculate required length.
18824
 *
18825
 * @param [in]  a    SP integer to convert.
18826
 * @param [out] str  String to hold decimal string result.
18827
 *
18828
 * @return  MP_OKAY on success.
18829
 * @return  MP_VAL when a or str is NULL.
18830
 * @return  MP_MEM when dynamic memory allocation fails.
18831
 */
18832
int sp_todecimal(const sp_int* a, char* str)
18833
0
{
18834
0
    int err = MP_OKAY;
18835
0
    int i;
18836
0
    int j;
18837
0
    sp_int_digit d = 0;
18838
18839
    /* Validate parameters. */
18840
0
    if ((a == NULL) || (str == NULL)) {
18841
0
        err = MP_VAL;
18842
0
    }
18843
    /* Quick out if number is zero. */
18844
0
    else if (sp_iszero(a) == MP_YES) {
18845
0
        *str++ = '0';
18846
0
        *str = '\0';
18847
0
    }
18848
0
    else if (a->used >= SP_INT_DIGITS) {
18849
0
        err = MP_VAL;
18850
0
    }
18851
0
    else {
18852
        /* Temporary that is divided by 10. */
18853
0
        DECL_SP_INT(t, a->used + 1);
18854
18855
0
        ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
18856
0
        if (err == MP_OKAY) {
18857
0
            _sp_copy(a, t);
18858
0
        }
18859
0
        if (err == MP_OKAY) {
18860
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18861
            if (a->sign == MP_NEG) {
18862
                /* Add negative sign character. */
18863
                *str = '-';
18864
                str++;
18865
            }
18866
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18867
18868
            /* Write out little endian. */
18869
0
            i = 0;
18870
0
            do {
18871
                /* Divide by 10 and get remainder of division. */
18872
0
                (void)sp_div_d(t, 10, t, &d);
18873
                /* Write out remainder as a character. */
18874
0
                str[i++] = (char)('0' + d);
18875
0
            }
18876
            /* Keep going while we there is a value to write. */
18877
0
            while (!sp_iszero(t));
18878
            /* Terminate string. */
18879
0
            str[i] = '\0';
18880
18881
0
            if (err == MP_OKAY) {
18882
                /* Reverse string to big endian. */
18883
0
                for (j = 0; j <= (i - 1) / 2; j++) {
18884
0
                    int c = (unsigned char)str[j];
18885
0
                    str[j] = str[i - 1 - j];
18886
0
                    str[i - 1 - j] = (char)c;
18887
0
                }
18888
0
            }
18889
0
        }
18890
18891
0
        FREE_SP_INT(t, NULL);
18892
0
    }
18893
18894
0
    return err;
18895
0
}
18896
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18897
18898
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18899
    defined(WC_MP_TO_RADIX)
18900
/* Put the string version, big-endian, of a in str using the given radix.
18901
 *
18902
 * @param [in]  a      SP integer to convert.
18903
 * @param [out] str    String to hold radix based string result.
18904
 * @param [in]  radix  Base of character.
18905
 *                     Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
18906
 *
18907
 * @return  MP_OKAY on success.
18908
 * @return  MP_VAL when a or str is NULL, or radix is not supported.
18909
 */
18910
int sp_toradix(const sp_int* a, char* str, int radix)
18911
0
{
18912
0
    int err = MP_OKAY;
18913
18914
    /* Validate parameters. */
18915
0
    if ((a == NULL) || (str == NULL)) {
18916
0
        err = MP_VAL;
18917
0
    }
18918
    /* Handle base 16 if requested. */
18919
0
    else if (radix == MP_RADIX_HEX) {
18920
0
        err = sp_tohex(a, str);
18921
0
    }
18922
0
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
18923
0
    defined(HAVE_COMP_KEY)
18924
    /* Handle base 10 if requested. */
18925
0
    else if (radix == MP_RADIX_DEC) {
18926
0
        err = sp_todecimal(a, str);
18927
0
    }
18928
0
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18929
0
    else {
18930
        /* Base not supported. */
18931
0
        err = MP_VAL;
18932
0
    }
18933
18934
0
    return err;
18935
0
}
18936
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18937
18938
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18939
    defined(WC_MP_TO_RADIX)
18940
/* Calculate the length of the string version, big-endian, of a using the given
18941
 * radix.
18942
 *
18943
 * @param [in]  a      SP integer to convert.
18944
 * @param [in]  radix  Base of character.
18945
 *                     Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
18946
 * @param [out] size   The number of characters in encoding.
18947
 *
18948
 * @return  MP_OKAY on success.
18949
 * @return  MP_VAL when a or size is NULL, or radix is not supported.
18950
 */
18951
int sp_radix_size(const sp_int* a, int radix, int* size)
18952
0
{
18953
0
    int err = MP_OKAY;
18954
18955
    /* Validate parameters. */
18956
0
    if ((a == NULL) || (size == NULL)) {
18957
0
        err = MP_VAL;
18958
0
    }
18959
    /* Handle base 16 if requested. */
18960
0
    else if (radix == MP_RADIX_HEX) {
18961
0
        if (a->used == 0) {
18962
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18963
            /* 00 and '\0' */
18964
0
            *size = 2 + 1;
18965
        #else
18966
            /* Zero and '\0' */
18967
            *size = 1 + 1;
18968
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18969
0
        }
18970
0
        else {
18971
            /* Count of nibbles. */
18972
0
            int cnt = (sp_count_bits(a) + 3) >> 2;
18973
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18974
            /* Must have even number of nibbles to have complete bytes. */
18975
0
            if (cnt & 1) {
18976
0
                cnt++;
18977
0
            }
18978
0
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18979
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18980
            /* Add to count of characters for negative sign. */
18981
            if (a->sign == MP_NEG) {
18982
                cnt++;
18983
            }
18984
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18985
            /* One more for \0 */
18986
0
            *size = cnt + 1;
18987
0
        }
18988
0
    }
18989
0
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
18990
0
    defined(HAVE_COMP_KEY)
18991
    /* Handle base 10 if requested. */
18992
0
    else if (radix == MP_RADIX_DEC) {
18993
0
        int i;
18994
0
        sp_int_digit d;
18995
18996
        /* quick out if its zero */
18997
0
        if (sp_iszero(a) == MP_YES) {
18998
            /* Zero and '\0' */
18999
0
            *size = 1 + 1;
19000
0
        }
19001
0
        else {
19002
0
            DECL_SP_INT(t, a->used);
19003
19004
            /* Temporary to be divided by 10. */
19005
0
            ALLOC_SP_INT(t, a->used, err, NULL);
19006
0
            if (err == MP_OKAY) {
19007
0
                t->size = a->used;
19008
0
                _sp_copy(a, t);
19009
0
            }
19010
19011
0
            if (err == MP_OKAY) {
19012
                /* Count number of times number can be divided by 10. */
19013
0
                for (i = 0; !sp_iszero(t); i++) {
19014
0
                    (void)sp_div_d(t, 10, t, &d);
19015
0
                }
19016
            #ifdef WOLFSSL_SP_INT_NEGATIVE
19017
                /* Add to count of characters for negative sign. */
19018
                if (a->sign == MP_NEG) {
19019
                    i++;
19020
                }
19021
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
19022
                /* One more for \0 */
19023
0
                *size = i + 1;
19024
0
            }
19025
19026
0
            FREE_SP_INT(t, NULL);
19027
0
        }
19028
0
    }
19029
0
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
19030
0
    else {
19031
        /* Base not supported. */
19032
0
        err = MP_VAL;
19033
0
    }
19034
19035
0
    return err;
19036
0
}
19037
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
19038
19039
/***************************************
19040
 * Prime number generation and checking.
19041
 ***************************************/
19042
19043
#if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
19044
    !defined(NO_DSA)) && !defined(WC_NO_RNG)
19045
#ifndef WOLFSSL_SP_MILLER_RABIN_CNT
19046
/* Always done 8 iterations of Miller-Rabin on check of primality when
19047
 * generating.
19048
 */
19049
#define WOLFSSL_SP_MILLER_RABIN_CNT     8
19050
#endif
19051
19052
/* Generate a random prime for RSA only.
19053
 *
19054
 * @param [out] r     SP integer to hold result.
19055
 * @param [in]  len   Number of bytes in prime. Use -ve to indicate the two
19056
 *                    lowest bits must be set.
19057
 * @param [in]  rng   Random number generator.
19058
 * @param [in]  heap  Heap hint. Unused.
19059
 *
19060
 * @return  MP_OKAY on success.
19061
 * @return  MP_VAL when r or rng is NULL, length is not supported or random
19062
 *          number generator fails.
19063
 */
19064
int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
19065
{
19066
    static const byte USE_BBS = 3;
19067
    int  err = MP_OKAY;
19068
    byte low_bits = 1;
19069
    int  isPrime = MP_NO;
19070
#if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
19071
    int  bits = 0;
19072
#endif /* WOLFSSL_SP_MATH_ALL */
19073
    unsigned int digits = 0;
19074
19075
    (void)heap;
19076
19077
    /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
19078
    if ((r == NULL) || (rng == NULL) || (len == 0)) {
19079
        err = MP_VAL;
19080
    }
19081
19082
    if (err == MP_OKAY) {
19083
        /* Get type. */
19084
        if (len < 0) {
19085
            low_bits = USE_BBS;
19086
            len = -len;
19087
        }
19088
19089
        /* Get number of digits required to handle required number of bytes. */
19090
        digits = ((unsigned int)len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
19091
        /* Ensure result has space. */
19092
        if (r->size < digits) {
19093
            err = MP_VAL;
19094
        }
19095
    }
19096
19097
    if (err == MP_OKAY) {
19098
    #ifndef WOLFSSL_SP_MATH_ALL
19099
        /* For minimal maths, support only what's in SP and needed for DH. */
19100
    #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
19101
        if (len == 32) {
19102
        }
19103
        else
19104
    #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
19105
        /* Generate RSA primes that are half the modulus length. */
19106
    #ifdef WOLFSSL_SP_4096
19107
        if (len == 256) {
19108
            /* Support 2048-bit operations compiled in. */
19109
        }
19110
        else
19111
    #endif
19112
    #ifndef WOLFSSL_SP_NO_3072
19113
        if (len == 192) {
19114
            /* Support 1536-bit operations compiled in. */
19115
        }
19116
        else
19117
    #endif
19118
    #ifndef WOLFSSL_SP_NO_2048
19119
        if (len == 128) {
19120
            /* Support 1024-bit operations compiled in. */
19121
        }
19122
        else
19123
    #endif
19124
        {
19125
            /* Bit length not supported in SP. */
19126
            err = MP_VAL;
19127
        }
19128
    #endif /* !WOLFSSL_SP_MATH_ALL */
19129
19130
    #ifdef WOLFSSL_SP_INT_NEGATIVE
19131
        /* Generated number is always positive. */
19132
        r->sign = MP_ZPOS;
19133
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
19134
        /* Set number of digits that will be used. */
19135
        r->used = (sp_size_t)digits;
19136
    #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
19137
        /* Calculate number of bits in last digit. */
19138
        bits = (len * 8) & SP_WORD_MASK;
19139
    #endif /* WOLFSSL_SP_MATH_ALL || BIG_ENDIAN_ORDER */
19140
    }
19141
19142
    /* Assume the candidate is probably prime and then test until it is proven
19143
     * composite.
19144
     */
19145
    while ((err == MP_OKAY) && (isPrime == MP_NO)) {
19146
#ifdef SHOW_GEN
19147
        printf(".");
19148
        fflush(stdout);
19149
#endif /* SHOW_GEN */
19150
        /* Generate bytes into digit array. */
19151
        err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, (word32)len);
19152
        if (err != 0) {
19153
            err = MP_VAL;
19154
            break;
19155
        }
19156
19157
        /* Set top bits to ensure bit length required is generated.
19158
         * Also set second top to help ensure product of two primes is
19159
         * going to be twice the number of bits of each.
19160
         */
19161
#ifdef LITTLE_ENDIAN_ORDER
19162
        ((byte*)r->dp)[len-1]             |= 0x80 | 0x40;
19163
#else
19164
        ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
19165
#endif /* LITTLE_ENDIAN_ORDER */
19166
19167
#ifdef BIG_ENDIAN_ORDER
19168
        /* Bytes were put into wrong place when less than full digit. */
19169
        if (bits != 0) {
19170
            r->dp[r->used - 1] >>= SP_WORD_SIZE - bits;
19171
        }
19172
#endif /* BIG_ENDIAN_ORDER */
19173
#ifdef WOLFSSL_SP_MATH_ALL
19174
        /* Mask top digit when less than a digit requested. */
19175
        if (bits > 0) {
19176
            r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
19177
        }
19178
#endif /* WOLFSSL_SP_MATH_ALL */
19179
        /* Set mandatory low bits
19180
         *  - bottom bit to make odd.
19181
         *  - For BBS, second lowest too to make Blum integer (3 mod 4).
19182
         */
19183
        r->dp[0] |= low_bits;
19184
19185
        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
19186
         * of a 1024-bit candidate being a false positive, when it is our
19187
         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
19188
         */
19189
        err = sp_prime_is_prime_ex(r, WOLFSSL_SP_MILLER_RABIN_CNT, &isPrime,
19190
            rng);
19191
    }
19192
19193
    return err;
19194
}
19195
#endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
19196
19197
#ifdef WOLFSSL_SP_PRIME_GEN
19198
/* Miller-Rabin test of "a" to the base of "b" as described in
19199
 * HAC pp. 139 Algorithm 4.24
19200
 *
19201
 * Sets result to 0 if definitely composite or 1 if probably prime.
19202
 * The chance of a random error is no more than 1/4 and often very much lower.
19203
 *
19204
 * a is assumed to be odd.
19205
 *
19206
 * @param [in]      a       SP integer to check.
19207
 * @param [in, out] b       SP integer that is a small prime or random value.
19208
 * @param [out]     result  MP_YES when number is likely prime.
19209
 *                          MP_NO otherwise.
19210
 * @param [out]      n1     SP integer temporary.
19211
 * @param [out]      r      SP integer temporary.
19212
 *
19213
 * @return  MP_OKAY on success.
19214
 * @return  MP_MEM when dynamic memory allocation fails.
19215
 */
19216
static int sp_prime_miller_rabin(const sp_int* a, sp_int* b, int* result,
19217
    sp_int* n1, sp_int* r)
19218
0
{
19219
0
    int err = MP_OKAY;
19220
0
    int s = 0;
19221
0
    sp_int* y = b;
19222
19223
    /* Assume not prime. */
19224
0
    *result = MP_NO;
19225
19226
    /* Ensure small prime is 2 or more. */
19227
0
    if (sp_cmp_d(b, 1) != MP_GT) {
19228
0
        err = MP_VAL;
19229
0
    }
19230
0
    if (err == MP_OKAY) {
19231
        /* n1 = a - 1 (a is assumed odd.) */
19232
0
        (void)sp_copy(a, n1);
19233
0
        n1->dp[0]--;
19234
19235
        /* Set 2**s * r = n1 */
19236
        /* Count the number of least significant bits which are zero. */
19237
0
        s = sp_cnt_lsb(n1);
19238
        /* Divide n - 1 by 2**s into r. */
19239
0
        (void)sp_rshb(n1, s, r);
19240
19241
        /* Compute y = b**r mod a */
19242
0
        err = sp_exptmod(b, r, a, y);
19243
0
    }
19244
0
    if (err == MP_OKAY) {
19245
        /* Assume probably prime until shown otherwise. */
19246
0
        *result = MP_YES;
19247
19248
        /* If y != 1 and y != n1 do */
19249
0
        if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
19250
0
            int j = 1;
19251
            /* While j <= s-1 and y != n1 */
19252
0
            while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
19253
                /* Square for bit shifted down. */
19254
0
                err = sp_sqrmod(y, a, y);
19255
0
                if (err != MP_OKAY) {
19256
0
                    break;
19257
0
                }
19258
19259
                /* If y == 1 then composite. */
19260
0
                if (sp_cmp_d(y, 1) == MP_EQ) {
19261
0
                    *result = MP_NO;
19262
0
                    break;
19263
0
                }
19264
0
                ++j;
19265
0
            }
19266
19267
            /* If y != n1 then composite. */
19268
0
            if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
19269
0
                *result = MP_NO;
19270
0
            }
19271
0
        }
19272
0
    }
19273
19274
0
    return err;
19275
0
}
19276
19277
#if SP_WORD_SIZE == 8
19278
/* Number of pre-computed primes. First n primes - fitting in a digit. */
19279
#define SP_PRIME_SIZE      54
19280
19281
static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
19282
    0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
19283
    0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
19284
    0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
19285
    0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
19286
    0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
19287
    0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
19288
    0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
19289
};
19290
#else
19291
/* Number of pre-computed primes. First n primes. */
19292
0
#define SP_PRIME_SIZE      256
19293
19294
/* The first 256 primes. */
19295
static const sp_uint16 sp_primes[SP_PRIME_SIZE] = {
19296
    0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
19297
    0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
19298
    0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
19299
    0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
19300
    0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
19301
    0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
19302
    0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
19303
    0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
19304
19305
    0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
19306
    0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
19307
    0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
19308
    0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
19309
    0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
19310
    0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
19311
    0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
19312
    0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
19313
19314
    0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
19315
    0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
19316
    0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
19317
    0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
19318
    0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
19319
    0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
19320
    0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
19321
    0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
19322
19323
    0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
19324
    0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
19325
    0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
19326
    0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
19327
    0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
19328
    0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
19329
    0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
19330
    0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
19331
};
19332
#endif
19333
19334
/* Compare the first n primes with a.
19335
 *
19336
 * @param [in]  a       Number to check.
19337
 * @param [out] result  Whether number was found to be prime.
19338
 * @return  0 when no small prime matches.
19339
 * @return  1 when small prime matches.
19340
 */
19341
static WC_INLINE int sp_cmp_primes(const sp_int* a, int* result)
19342
0
{
19343
0
    int i;
19344
0
    int haveRes = 0;
19345
19346
0
    *result = MP_NO;
19347
    /* Check one digit a against primes table. */
19348
0
    for (i = 0; i < SP_PRIME_SIZE; i++) {
19349
0
        if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
19350
0
            *result = MP_YES;
19351
0
            haveRes = 1;
19352
0
            break;
19353
0
        }
19354
0
    }
19355
19356
0
    return haveRes;
19357
0
}
19358
19359
/* Using composites is only faster when using 64-bit values. */
19360
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
19361
/* Number of composites. */
19362
0
#define SP_COMP_CNT     38
19363
19364
/* Products of small primes that fit into 64-bits. */
19365
static sp_int_digit sp_comp[SP_COMP_CNT] = {
19366
    0x088886ffdb344692, 0x34091fa96ffdf47b, 0x3c47d8d728a77ebb,
19367
    0x077ab7da9d709ea9, 0x310df3e7bd4bc897, 0xe657d7a1fd5161d1,
19368
    0x02ad3dbe0cca85ff, 0x0787f9a02c3388a7, 0x1113c5cc6d101657,
19369
    0x2456c94f936bdb15, 0x4236a30b85ffe139, 0x805437b38eada69d,
19370
    0x00723e97bddcd2af, 0x00a5a792ee239667, 0x00e451352ebca269,
19371
    0x013a7955f14b7805, 0x01d37cbd653b06ff, 0x0288fe4eca4d7cdf,
19372
    0x039fddb60d3af63d, 0x04cd73f19080fb03, 0x0639c390b9313f05,
19373
    0x08a1c420d25d388f, 0x0b4b5322977db499, 0x0e94c170a802ee29,
19374
    0x11f6a0e8356100df, 0x166c8898f7b3d683, 0x1babda0a0afd724b,
19375
    0x2471b07c44024abf, 0x2d866dbc2558ad71, 0x3891410d45fb47df,
19376
    0x425d5866b049e263, 0x51f767298e2cf13b, 0x6d9f9ece5fc74f13,
19377
    0x7f5ffdb0f56ee64d, 0x943740d46a1bc71f, 0xaf2d7ca25cec848f,
19378
    0xcec010484e4ad877, 0xef972c3cfafbcd25
19379
};
19380
19381
/* Index of next prime after those used to create composite. */
19382
static int sp_comp_idx[SP_COMP_CNT] = {
19383
     15,  25,  34,  42,  50,  58,  65,  72,  79,  86,  93, 100, 106, 112, 118,
19384
    124, 130, 136, 142, 148, 154, 160, 166, 172, 178, 184, 190, 196, 202, 208,
19385
    214, 220, 226, 232, 238, 244, 250, 256
19386
};
19387
#endif
19388
19389
/* Determines whether any of the first n small primes divide a evenly.
19390
 *
19391
 * @param [in]      a        Number to check.
19392
 * @param [in, out] haveRes  Boolean indicating a no prime result found.
19393
 * @param [in, out] result   Whether a is known to be prime.
19394
 * @return  MP_OKAY on success.
19395
 * @return  Negative on failure.
19396
 */
19397
static WC_INLINE int sp_div_primes(const sp_int* a, int* haveRes, int* result)
19398
0
{
19399
0
    int i;
19400
0
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
19401
0
    int j;
19402
0
#endif
19403
0
    sp_int_digit d;
19404
0
    int err = MP_OKAY;
19405
19406
#if defined(WOLFSSL_SP_SMALL) || (SP_WORD_SIZE < 64)
19407
    /* Do trial division of a with all known small primes. */
19408
    for (i = 0; i < SP_PRIME_SIZE; i++) {
19409
        /* Small prime divides a when remainder is 0. */
19410
        err = sp_mod_d(a, (sp_int_digit)sp_primes[i], &d);
19411
        if ((err != MP_OKAY) || (d == 0)) {
19412
            *result = MP_NO;
19413
            *haveRes = 1;
19414
            break;
19415
        }
19416
    }
19417
#else
19418
    /* Start with first prime in composite. */
19419
0
    i = 0;
19420
0
    for (j = 0; (!(*haveRes)) && (j < SP_COMP_CNT); j++) {
19421
        /* Reduce a down to a single word.  */
19422
0
        err = sp_mod_d(a, sp_comp[j], &d);
19423
0
        if ((err != MP_OKAY) || (d == 0)) {
19424
0
            *result = MP_NO;
19425
0
            *haveRes = 1;
19426
0
            break;
19427
0
        }
19428
        /* Do trial division of d with small primes that make up composite. */
19429
0
        for (; i < sp_comp_idx[j]; i++) {
19430
            /* Small prime divides a when remainder is 0. */
19431
0
            if (d % sp_primes[i] == 0) {
19432
0
                *result = MP_NO;
19433
0
                *haveRes = 1;
19434
0
                break;
19435
0
            }
19436
0
        }
19437
0
    }
19438
0
#endif
19439
19440
0
    return err;
19441
0
}
19442
19443
/* Check whether a is prime by checking t iterations of Miller-Rabin.
19444
 *
19445
 * @param [in]  a       SP integer to check.
19446
 * @param [in]  trials  Number of trials of Miller-Rabin test to perform.
19447
 * @param [out] result  MP_YES when number is prime.
19448
 *                      MP_NO otherwise.
19449
 *
19450
 * @return  MP_OKAY on success.
19451
 * @return  MP_MEM when dynamic memory allocation fails.
19452
 */
19453
static int _sp_prime_trials(const sp_int* a, int trials, int* result)
19454
0
{
19455
0
    int err = MP_OKAY;
19456
0
    int i;
19457
0
    DECL_SP_INT(n1, a->used + 1);
19458
0
    DECL_SP_INT(r, a->used + 1);
19459
0
    DECL_SP_INT(b, a->used * 2 + 1);
19460
19461
0
    ALLOC_SP_INT(n1, a->used + 1, err, NULL);
19462
0
    ALLOC_SP_INT(r, a->used + 1, err, NULL);
19463
    /* Allocate number that will hold modular exponentiation result. */
19464
0
    ALLOC_SP_INT(b, a->used * 2 + 1, err, NULL);
19465
0
    if (err == MP_OKAY) {
19466
0
        _sp_init_size(n1, a->used + 1U);
19467
0
        _sp_init_size(r, a->used + 1U);
19468
0
        _sp_init_size(b, (sp_size_t)(a->used * 2U + 1U));
19469
19470
        /* Do requested number of trials of Miller-Rabin test. */
19471
0
        for (i = 0; i < trials; i++) {
19472
            /* Miller-Rabin test with known small prime. */
19473
0
            _sp_set(b, sp_primes[i]);
19474
0
            err = sp_prime_miller_rabin(a, b, result, n1, r);
19475
0
            if ((err != MP_OKAY) || (*result == MP_NO)) {
19476
0
                break;
19477
0
            }
19478
0
        }
19479
19480
        /* Clear temporary values. */
19481
0
        sp_clear(n1);
19482
0
        sp_clear(r);
19483
0
        sp_clear(b);
19484
0
    }
19485
19486
    /* Free allocated temporary. */
19487
0
    FREE_SP_INT(b, NULL);
19488
0
    FREE_SP_INT(r, NULL);
19489
0
    FREE_SP_INT(n1, NULL);
19490
0
    return err;
19491
0
}
19492
19493
/* Check whether a is prime.
19494
 * Checks against a number of small primes and does trials iterations of
19495
 * Miller-Rabin.
19496
 *
19497
 * @param [in]  a       SP integer to check.
19498
 * @param [in]  trials  Number of trials of Miller-Rabin test to perform.
19499
 * @param [out] result  MP_YES when number is prime.
19500
 *                      MP_NO otherwise.
19501
 *
19502
 * @return  MP_OKAY on success.
19503
 * @return  MP_VAL when a or result is NULL, or trials is out of range.
19504
 * @return  MP_MEM when dynamic memory allocation fails.
19505
 */
19506
int sp_prime_is_prime(const sp_int* a, int trials, int* result)
19507
0
{
19508
0
    int         err = MP_OKAY;
19509
0
    int         haveRes = 0;
19510
19511
    /* Validate parameters. */
19512
0
    if ((a == NULL) || (result == NULL)) {
19513
0
        if (result != NULL) {
19514
0
            *result = MP_NO;
19515
0
        }
19516
0
        err = MP_VAL;
19517
0
    }
19518
0
    else if (a->used * 2 >= SP_INT_DIGITS) {
19519
0
        err = MP_VAL;
19520
0
    }
19521
    /* Check validity of Miller-Rabin iterations count.
19522
     * Must do at least one and need a unique pre-computed prime for each
19523
     * iteration.
19524
     */
19525
0
    if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
19526
0
        *result = MP_NO;
19527
0
        err = MP_VAL;
19528
0
    }
19529
19530
    /* Short-cut, 1 is not prime. */
19531
0
    if ((err == MP_OKAY) && sp_isone(a)) {
19532
0
        *result = MP_NO;
19533
0
        haveRes = 1;
19534
0
    }
19535
19536
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19537
19538
    /* Check against known small primes when a has 1 digit. */
19539
0
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
19540
0
            (a->dp[0] <= sp_primes[SP_PRIME_SIZE - 1])) {
19541
0
        haveRes = sp_cmp_primes(a, result);
19542
0
    }
19543
19544
    /* Check all small primes for even divisibility. */
19545
0
    if ((err == MP_OKAY) && (!haveRes)) {
19546
0
        err = sp_div_primes(a, &haveRes, result);
19547
0
    }
19548
19549
    /* Check a number of iterations of Miller-Rabin with small primes. */
19550
0
    if ((err == MP_OKAY) && (!haveRes)) {
19551
0
        err = _sp_prime_trials(a, trials, result);
19552
0
    }
19553
19554
0
    RESTORE_VECTOR_REGISTERS();
19555
19556
0
    return err;
19557
0
}
19558
19559
#ifndef WC_NO_RNG
19560
/* Check whether a is prime by doing t iterations of Miller-Rabin.
19561
 *
19562
 * t random numbers should give a (1/4)^t chance of a false prime.
19563
 *
19564
 * @param [in]  a       SP integer to check.
19565
 * @param [in]  trials  Number of iterations of Miller-Rabin test to perform.
19566
 * @param [out] result  MP_YES when number is prime.
19567
 *                      MP_NO otherwise.
19568
 * @param [in]  rng     Random number generator for Miller-Rabin testing.
19569
 *
19570
 * @return  MP_OKAY on success.
19571
 * @return  MP_VAL when a, result or rng is NULL.
19572
 * @return  MP_MEM when dynamic memory allocation fails.
19573
 */
19574
static int _sp_prime_random_trials(const sp_int* a, int trials, int* result,
19575
    WC_RNG* rng)
19576
0
{
19577
0
    int err = MP_OKAY;
19578
0
    int bits = sp_count_bits(a);
19579
0
    word32 baseSz = ((word32)bits + 7) >> 3;
19580
0
    DECL_SP_INT_ARRAY(ds, a->used + 1, 2);
19581
0
    DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
19582
19583
0
    ALLOC_SP_INT_ARRAY(ds, a->used + 1, 2, err, NULL);
19584
0
    ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
19585
0
    if (err == MP_OKAY) {
19586
0
        sp_int* c  = ds[0];
19587
0
        sp_int* n1 = ds[1];
19588
0
        sp_int* b  = d[0];
19589
0
        sp_int* r  = d[1];
19590
19591
0
        _sp_init_size(c , a->used + 1U);
19592
0
        _sp_init_size(n1, a->used + 1U);
19593
0
        _sp_init_size(b , (sp_size_t)(a->used * 2U + 1U));
19594
0
        _sp_init_size(r , (sp_size_t)(a->used * 2U + 1U));
19595
19596
0
        _sp_sub_d(a, 2, c);
19597
19598
0
        bits &= SP_WORD_MASK;
19599
19600
        /* Keep trying random numbers until all trials complete. */
19601
0
        while (trials > 0) {
19602
            /* Generate random trial number. */
19603
0
            err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
19604
0
            if (err != MP_OKAY) {
19605
0
                break;
19606
0
            }
19607
0
            b->used = a->used;
19608
        #ifdef BIG_ENDIAN_ORDER
19609
            /* Fix top digit if fewer bytes than a full digit generated. */
19610
            if (((baseSz * 8) & SP_WORD_MASK) != 0) {
19611
                b->dp[b->used-1] >>=
19612
                    SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
19613
            }
19614
        #endif /* BIG_ENDIAN_ORDER */
19615
19616
            /* Ensure the top word has no more bits than necessary. */
19617
0
            if (bits > 0) {
19618
0
                b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
19619
0
                sp_clamp(b);
19620
0
            }
19621
19622
            /* Can't use random value it is: 0, 1, 2, a-2, a-1, >= a  */
19623
0
            if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
19624
0
                continue;
19625
0
            }
19626
19627
            /* Perform Miller-Rabin test with random value. */
19628
0
            err = sp_prime_miller_rabin(a, b, result, n1, r);
19629
0
            if ((err != MP_OKAY) || (*result == MP_NO)) {
19630
0
                break;
19631
0
            }
19632
19633
            /* Trial complete. */
19634
0
            trials--;
19635
0
        }
19636
19637
        /* Zeroize temporary values used when generating private prime. */
19638
0
        sp_forcezero(n1);
19639
0
        sp_forcezero(r);
19640
0
        sp_forcezero(b);
19641
0
        sp_forcezero(c);
19642
0
    }
19643
19644
0
    FREE_SP_INT_ARRAY(d, NULL);
19645
0
    FREE_SP_INT_ARRAY(ds, NULL);
19646
0
    return err;
19647
0
}
19648
#endif /*!WC_NO_RNG */
19649
19650
/* Check whether a is prime.
19651
 * Checks against a number of small primes and does trials iterations of
19652
 * Miller-Rabin.
19653
 *
19654
 * @param [in]  a       SP integer to check.
19655
 * @param [in]  trials  Number of iterations of Miller-Rabin test to perform.
19656
 * @param [out] result  MP_YES when number is prime.
19657
 *                      MP_NO otherwise.
19658
 * @param [in]  rng     Random number generator for Miller-Rabin testing.
19659
 *
19660
 * @return  MP_OKAY on success.
19661
 * @return  MP_VAL when a, result or rng is NULL.
19662
 * @return  MP_MEM when dynamic memory allocation fails.
19663
 */
19664
int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng)
19665
0
{
19666
0
    int err = MP_OKAY;
19667
0
    int ret = MP_YES;
19668
0
    int haveRes = 0;
19669
19670
0
    if ((a == NULL) || (result == NULL) || (rng == NULL)) {
19671
0
        err = MP_VAL;
19672
0
    }
19673
0
#ifndef WC_NO_RNG
19674
0
    if ((err == MP_OKAY) && (a->used * 2 >= SP_INT_DIGITS)) {
19675
0
        err = MP_VAL;
19676
0
    }
19677
0
#endif
19678
#ifdef WOLFSSL_SP_INT_NEGATIVE
19679
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
19680
        err = MP_VAL;
19681
    }
19682
#endif
19683
19684
    /* Ensure trials is valid. Maximum based on number of small primes
19685
     * available. */
19686
0
    if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
19687
0
        err = MP_VAL;
19688
0
    }
19689
19690
    /* A value of 1 is not prime. */
19691
0
    if ((err == MP_OKAY) && sp_isone(a)) {
19692
0
        ret = MP_NO;
19693
0
        haveRes = 1;
19694
0
    }
19695
19696
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19697
19698
    /* Check against known small primes when a has 1 digit. */
19699
0
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
19700
0
            (a->dp[0] <= (sp_int_digit)sp_primes[SP_PRIME_SIZE - 1])) {
19701
0
        haveRes = sp_cmp_primes(a, &ret);
19702
0
    }
19703
19704
    /* Check all small primes for even divisibility. */
19705
0
    if ((err == MP_OKAY) && (!haveRes)) {
19706
0
        err = sp_div_primes(a, &haveRes, &ret);
19707
0
    }
19708
19709
0
#ifndef WC_NO_RNG
19710
    /* Check a number of iterations of Miller-Rabin with random large values. */
19711
0
    if ((err == MP_OKAY) && (!haveRes)) {
19712
0
        err = _sp_prime_random_trials(a, trials, &ret, rng);
19713
0
    }
19714
#else
19715
    (void)trials;
19716
#endif /* !WC_NO_RNG */
19717
19718
0
    if (result != NULL) {
19719
0
        *result = ret;
19720
0
    }
19721
19722
0
    RESTORE_VECTOR_REGISTERS();
19723
19724
0
    return err;
19725
0
}
19726
#endif /* WOLFSSL_SP_PRIME_GEN */
19727
19728
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
19729
19730
/* Calculates the Greatest Common Divisor (GCD) of a and b into r.
19731
 *
19732
 * Find the largest number that divides both a and b without remainder.
19733
 * r <= a, r <= b, a % r == 0, b % r == 0
19734
 *
19735
 * a and b are positive integers.
19736
 *
19737
 * Euclidean Algorithm:
19738
 *  1. If a > b then a = b, b = a
19739
 *  2. u = a
19740
 *  3. v = b mod a
19741
 *  4. While v != 0
19742
 *   4.1. t = u mod v
19743
 *   4.2. u <= v, v <= t, t <= u
19744
 *  5. r = u
19745
 *
19746
 * @param [in]  a  SP integer of first operand.
19747
 * @param [in]  b  SP integer of second operand.
19748
 * @param [out] r  SP integer to hold result.
19749
 *
19750
 * @return  MP_OKAY on success.
19751
 * @return  MP_MEM when dynamic memory allocation fails.
19752
 */
19753
static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
19754
{
19755
    int err = MP_OKAY;
19756
    sp_int* u = NULL;
19757
    sp_int* v = NULL;
19758
    sp_int* t = NULL;
19759
    /* Used for swapping sp_ints. */
19760
    sp_int* s;
19761
    /* Determine maximum digit length numbers will reach. */
19762
    unsigned int used = (a->used >= b->used) ? a->used + 1U : b->used + 1U;
19763
    DECL_SP_INT_ARRAY(d, used, 3);
19764
19765
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19766
19767
    ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
19768
    if (err == MP_OKAY) {
19769
        u = d[0];
19770
        v = d[1];
19771
        t = d[2];
19772
19773
        _sp_init_size(u, used);
19774
        _sp_init_size(v, used);
19775
        _sp_init_size(t, used);
19776
19777
        /* 1. If a > b then a = b, b = a.
19778
         *    Make a <= b.
19779
         */
19780
        if (_sp_cmp(a, b) == MP_GT) {
19781
            const sp_int* tmp;
19782
            tmp = a;
19783
            a = b;
19784
            b = tmp;
19785
        }
19786
        /* 2. u = a */
19787
        _sp_copy(a, u);
19788
        /* 3. v = b mod a */
19789
        if (a->used == 1) {
19790
            err = sp_mod_d(b, a->dp[0], &v->dp[0]);
19791
            v->used = (v->dp[0] != 0);
19792
        }
19793
        else {
19794
            err = sp_mod(b, a, v);
19795
        }
19796
    }
19797
19798
    /* 4. While v != 0 */
19799
    /* Keep reducing larger by smaller until smaller is 0 or u and v both one
19800
     * digit.
19801
     */
19802
    while ((err == MP_OKAY) && (!sp_iszero(v)) && (u->used > 1)) {
19803
        /* u' = v, v' = u mod v */
19804
        /* 4.1 t = u mod v */
19805
        if (v->used == 1) {
19806
            err = sp_mod_d(u, v->dp[0], &t->dp[0]);
19807
            t->used = (t->dp[0] != 0);
19808
        }
19809
        else {
19810
            err = sp_mod(u, v, t);
19811
        }
19812
        /* 4.2. u <= v, v <= t, t <= u */
19813
        s = u; u = v; v = t; t = s;
19814
    }
19815
    /* Only one digit remaining in u and v. */
19816
    while ((err == MP_OKAY) && (!sp_iszero(v))) {
19817
        /* u' = v, v' = u mod v */
19818
        /* 4.1 t = u mod v */
19819
        t->dp[0] = u->dp[0] % v->dp[0];
19820
        t->used = (t->dp[0] != 0);
19821
        /* 4.2. u <= v, v <= t, t <= u */
19822
        s = u; u = v; v = t; t = s;
19823
    }
19824
    if (err == MP_OKAY) {
19825
        /* 5. r = u */
19826
        _sp_copy(u, r);
19827
    }
19828
19829
    FREE_SP_INT_ARRAY(d, NULL);
19830
19831
    RESTORE_VECTOR_REGISTERS();
19832
19833
    return err;
19834
}
19835
19836
/* Calculates the Greatest Common Divisor (GCD) of a and b into r.
19837
 *
19838
 * Find the largest number that divides both a and b without remainder.
19839
 * r <= a, r <= b, a % r == 0, b % r == 0
19840
 *
19841
 * a and b are positive integers.
19842
 *
19843
 * @param [in]  a  SP integer of first operand.
19844
 * @param [in]  b  SP integer of second operand.
19845
 * @param [out] r  SP integer to hold result.
19846
 *
19847
 * @return  MP_OKAY on success.
19848
 * @return  MP_VAL when a, b or r is NULL or too large.
19849
 * @return  MP_MEM when dynamic memory allocation fails.
19850
 */
19851
int sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
19852
{
19853
    int err = MP_OKAY;
19854
19855
    /* Validate parameters. */
19856
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
19857
        err = MP_VAL;
19858
    }
19859
    /* Check that we have space in numbers to do work. */
19860
    else if ((a->used >= SP_INT_DIGITS) || (b->used >= SP_INT_DIGITS)) {
19861
        err = MP_VAL;
19862
    }
19863
    /* Check that r is large enough to hold maximum sized result. */
19864
    else if (((a->used <= b->used) && (r->size < a->used)) ||
19865
             ((b->used < a->used) && (r->size < b->used))) {
19866
        err = MP_VAL;
19867
    }
19868
#ifdef WOLFSSL_SP_INT_NEGATIVE
19869
    /* Algorithm doesn't work with negative numbers. */
19870
    else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
19871
        err = MP_VAL;
19872
    }
19873
#endif
19874
    else if (sp_iszero(a)) {
19875
        /* GCD of 0 and 0 is undefined - all integers divide 0. */
19876
        if (sp_iszero(b)) {
19877
            err = MP_VAL;
19878
        }
19879
        else {
19880
            /* GCD of 0 and b is b - b divides 0. */
19881
            err = sp_copy(b, r);
19882
        }
19883
    }
19884
    else if (sp_iszero(b)) {
19885
        /* GCD of 0 and a is a - a divides 0. */
19886
        err = sp_copy(a, r);
19887
    }
19888
    else {
19889
        /* Calculate GCD. */
19890
        err = _sp_gcd(a, b, r);
19891
    }
19892
19893
    return err;
19894
}
19895
19896
#endif /* !NO_RSA && WOLFSSL_KEY_GEN */
19897
19898
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
19899
    (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
19900
19901
/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
19902
 * Smallest number divisible by both numbers.
19903
 *
19904
 * a and b are positive integers.
19905
 *
19906
 * lcm(a, b) = (a / gcd(a, b)) * b
19907
 * Divide the larger value by the common divisor and multiply by the other.
19908
 *
19909
 * Algorithm:
19910
 *  1. t0 = gcd(a, b)
19911
 *  2. If a > b then
19912
 *   2.1. t1 = a / t0
19913
 *   2.2. r = b * t1
19914
 *  3. Else
19915
 *   3.1. t1 = b / t0
19916
 *   3.2. r = a * t1
19917
 *
19918
 * @param [in]  a  SP integer of first operand.
19919
 * @param [in]  b  SP integer of second operand.
19920
 * @param [out] r  SP integer to hold result.
19921
 *
19922
 * @return  MP_OKAY on success.
19923
 * @return  MP_MEM when dynamic memory allocation fails.
19924
 */
19925
static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
19926
{
19927
    int err = MP_OKAY;
19928
    /* Determine maximum digit length numbers will reach. */
19929
    unsigned int used = ((a->used >= b->used) ? a->used + 1: b->used + 1);
19930
    DECL_SP_INT_ARRAY(t, used, 2);
19931
19932
    ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
19933
    if (err == MP_OKAY) {
19934
        _sp_init_size(t[0], used);
19935
        _sp_init_size(t[1], used);
19936
19937
        SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19938
19939
        if (err == MP_OKAY) {
19940
            /* 1. t0 = gcd(a, b) */
19941
            err = sp_gcd(a, b, t[0]);
19942
        }
19943
19944
        if (err == MP_OKAY) {
19945
            /* Divide the greater by the common divisor and multiply by other
19946
             * to operate on the smallest length numbers.
19947
             */
19948
            /* 2. If a > b then */
19949
            if (_sp_cmp_abs(a, b) == MP_GT) {
19950
                /* 2.1. t1 = a / t0 */
19951
                err = sp_div(a, t[0], t[1], NULL);
19952
                if (err == MP_OKAY) {
19953
                    /* 2.2. r = b * t1 */
19954
                    err = sp_mul(b, t[1], r);
19955
                }
19956
            }
19957
            /* 3. Else */
19958
            else {
19959
                /* 3.1. t1 = b / t0 */
19960
                err = sp_div(b, t[0], t[1], NULL);
19961
                if (err == MP_OKAY) {
19962
                    /* 3.2. r = a * t1 */
19963
                    err = sp_mul(a, t[1], r);
19964
                }
19965
            }
19966
        }
19967
19968
        RESTORE_VECTOR_REGISTERS();
19969
    }
19970
19971
    FREE_SP_INT_ARRAY(t, NULL);
19972
    return err;
19973
}
19974
19975
/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
19976
 * Smallest number divisible by both numbers.
19977
 *
19978
 * a and b are positive integers.
19979
 *
19980
 * @param [in]  a  SP integer of first operand.
19981
 * @param [in]  b  SP integer of second operand.
19982
 * @param [out] r  SP integer to hold result.
19983
 *
19984
 * @return  MP_OKAY on success.
19985
 * @return  MP_VAL when a, b or r is NULL; or a or b is zero.
19986
 * @return  MP_MEM when dynamic memory allocation fails.
19987
 */
19988
int sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
19989
{
19990
    int err = MP_OKAY;
19991
19992
    /* Validate parameters. */
19993
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
19994
        err = MP_VAL;
19995
    }
19996
#ifdef WOLFSSL_SP_INT_NEGATIVE
19997
    /* Ensure a and b are positive. */
19998
    else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
19999
        err = MP_VAL;
20000
    }
20001
#endif
20002
    /* Ensure r has space for maximal result. */
20003
    else if (r->size < a->used + b->used) {
20004
        err = MP_VAL;
20005
    }
20006
20007
    /* LCM of 0 and any number is undefined as 0 is not in the set of values
20008
     * being used.
20009
     */
20010
    if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
20011
        err = MP_VAL;
20012
    }
20013
20014
    if (err == MP_OKAY) {
20015
        /* Do operation. */
20016
        err = _sp_lcm(a, b, r);
20017
    }
20018
20019
    return err;
20020
}
20021
20022
#endif /* !NO_RSA && WOLFSSL_KEY_GEN && (!WC_RSA_BLINDING || HAVE_FIPS ||
20023
        * HAVE_SELFTEST) */
20024
20025
/* Returns the run time settings.
20026
 *
20027
 * @return  Settings value.
20028
 */
20029
word32 CheckRunTimeSettings(void)
20030
0
{
20031
0
    return CTC_SETTINGS;
20032
0
}
20033
20034
/* Returns the fast math settings.
20035
 *
20036
 * @return  Setting - number of bits in a digit.
20037
 */
20038
word32 CheckRunTimeFastMath(void)
20039
0
{
20040
0
    return SP_WORD_SIZE;
20041
0
}
20042
20043
#ifdef WOLFSSL_CHECK_MEM_ZERO
20044
/* Add an SP integer to the memory check list.
20045
 *
20046
 * @param [in] name  Name of address to check.
20047
 * @param [in] sp    sp_int that needs to be checked.
20048
 */
20049
void sp_memzero_add(const char* name, sp_int* sp)
20050
{
20051
    wc_MemZero_Add(name, sp->dp, sp->size * sizeof(sp_int_digit));
20052
}
20053
20054
/* Check the memory in the data pointer for memory that must be zero.
20055
 *
20056
 * @param [in] sp  sp_int that needs to be checked.
20057
 */
20058
void sp_memzero_check(sp_int* sp)
20059
{
20060
    wc_MemZero_Check(sp->dp, sp->size * sizeof(sp_int_digit));
20061
}
20062
#endif /* WOLFSSL_CHECK_MEM_ZERO */
20063
20064
#ifdef WOLFSSL_SP_DYN_STACK
20065
    PRAGMA_GCC_DIAG_POP
20066
#endif
20067
20068
#endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */