Coverage Report

Created: 2025-11-16 07:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wolfssl-normal-math/wolfcrypt/src/fe_448.c
Line
Count
Source
1
/* fe_448.c
2
 *
3
 * Copyright (C) 2006-2025 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
23
 * Small implementation based on Daniel Beer's curve25519 public domain work.
24
 * Reworked for curve448 by Sean Parkinson.
25
 */
26
27
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
28
29
#if defined(HAVE_CURVE448) || defined(HAVE_ED448)
30
31
#include <wolfssl/wolfcrypt/fe_448.h>
32
33
#ifdef NO_INLINE
34
    #include <wolfssl/wolfcrypt/misc.h>
35
#else
36
    #define WOLFSSL_MISC_INCLUDED
37
    #include <wolfcrypt/src/misc.c>
38
#endif
39
40
#if defined(CURVE448_SMALL) || defined(ED448_SMALL)
41
42
/* Initialize the field element operations.
43
 */
44
void fe448_init(void)
45
{
46
}
47
48
/* Normalize the field element.
49
 * Ensure result is in range: 0..2^448-2^224-2
50
 *
51
 * a  [in]  Field element in range 0..2^448-1.
52
 */
53
void fe448_norm(word8* a)
54
{
55
    int i;
56
    sword16 c = 0;
57
    sword16 o = 0;
58
59
    for (i = 0; i < 56; i++) {
60
        c += a[i];
61
        if ((i == 0) || (i == 28))
62
            c += 1;
63
        c >>= 8;
64
    }
65
66
    for (i = 0; i < 56; i++) {
67
        if ((i == 0) || (i == 28)) o += c;
68
        o += a[i];
69
        a[i] = (word8)o;
70
        o >>= 8;
71
    }
72
}
73
74
/* Copy one field element into another: d = a.
75
 *
76
 * d  [in]  Destination field element.
77
 * a  [in]  Source field element.
78
 */
79
void fe448_copy(word8* d, const word8* a)
80
{
81
    int i;
82
    for (i = 0; i < 56; i++) {
83
         d[i] = a[i];
84
    }
85
}
86
87
/* Conditionally swap the elements.
88
 * Constant time implementation.
89
 *
90
 * a  [in]  First field element.
91
 * b  [in]  Second field element.
92
 * c  [in]  Swap when 1. Valid values: 0, 1.
93
 */
94
static void fe448_cswap(word8* a, word8* b, int c)
95
{
96
    int i;
97
    word8 mask = -(word8)c;
98
    word8 t[56];
99
100
    for (i = 0; i < 56; i++)
101
        t[i] = (a[i] ^ b[i]) & mask;
102
    for (i = 0; i < 56; i++)
103
        a[i] ^= t[i];
104
    for (i = 0; i < 56; i++)
105
        b[i] ^= t[i];
106
}
107
108
/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
109
 *
110
 * r  [in]  Field element to hold sum.
111
 * a  [in]  Field element to add.
112
 * b  [in]  Field element to add.
113
 */
114
void fe448_add(word8* r, const word8* a, const word8* b)
115
{
116
    int i;
117
    sword16 c = 0;
118
    sword16 o = 0;
119
120
    for (i = 0; i < 56; i++) {
121
        c += a[i];
122
        c += b[i];
123
        r[i] = (word8)c;
124
        c >>= 8;
125
    }
126
127
    for (i = 0; i < 56; i++) {
128
        if ((i == 0) || (i == 28)) o += c;
129
        o += r[i];
130
        r[i] = (word8)o;
131
        o >>= 8;
132
    }
133
}
134
135
/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
136
 *
137
 * r  [in]  Field element to hold difference.
138
 * a  [in]  Field element to subtract from.
139
 * b  [in]  Field element to subtract.
140
 */
141
void fe448_sub(word8* r, const word8* a, const word8* b)
142
{
143
    int i;
144
    sword16 c = 0;
145
    sword16 o = 0;
146
147
    for (i = 0; i < 56; i++) {
148
        if (i == 28)
149
            c += 0x1fc;
150
        else
151
            c += 0x1fe;
152
        c += a[i];
153
        c -= b[i];
154
        r[i] = (word8)c;
155
        c >>= 8;
156
    }
157
158
    for (i = 0; i < 56; i++) {
159
        if ((i == 0) || (i == 28)) o += c;
160
        o += r[i];
161
        r[i] = (word8)o;
162
        o >>= 8;
163
    }
164
}
165
166
/* Multiply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
167
 *
168
 * r  [in]  Field element to hold result.
169
 * a  [in]  Field element to multiply.
170
 */
171
void fe448_mul39081(word8* r, const word8* a)
172
{
173
    int i;
174
    sword32 c = 0;
175
    sword32 o = 0;
176
177
    for (i = 0; i < 56; i++) {
178
        c += a[i] * (sword32)39081;
179
        r[i] = (word8)c;
180
        c >>= 8;
181
    }
182
183
    for (i = 0; i < 56; i++) {
184
        if ((i == 0) || (i == 28)) o += c;
185
        o += r[i];
186
        r[i] = (word8)o;
187
        o >>= 8;
188
    }
189
}
190
191
/* Multiply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
192
 *
193
 * r  [in]  Field element to hold result.
194
 * a  [in]  Field element to multiply.
195
 * b  [in]  Field element to multiply.
196
 */
197
void fe448_mul(word8* r, const word8* a, const word8* b)
198
{
199
    int i, k;
200
    sword32 c = 0;
201
    sword16 o = 0, cc = 0;
202
    word8 t[112];
203
204
    for (k = 0; k < 56; k++) {
205
        i = 0;
206
        for (; i <= k; i++) {
207
            c += (sword32)a[i] * b[k - i];
208
        }
209
        t[k] = (word8)c;
210
        c >>= 8;
211
    }
212
    for (; k < 111; k++) {
213
        i = k - 55;
214
        for (; i < 56; i++) {
215
            c += (sword32)a[i] * b[k - i];
216
        }
217
        t[k] = (word8)c;
218
        c >>= 8;
219
    }
220
    t[k] = (word8)c;
221
222
    for (i = 0; i < 28; i++) {
223
        o += t[i];
224
        o += t[i + 56];
225
        o += t[i + 84];
226
        r[i] = (word8)o;
227
        o >>= 8;
228
    }
229
    for (i = 28; i < 56; i++) {
230
        o += t[i];
231
        o += t[i + 56];
232
        o += t[i + 28];
233
        o += t[i + 56];
234
        r[i] = (word8)o;
235
        o >>= 8;
236
    }
237
    for (i = 0; i < 56; i++) {
238
        if ((i == 0) || (i == 28)) cc += o;
239
        cc += r[i];
240
        r[i] = (word8)cc;
241
        cc >>= 8;
242
    }
243
}
244
245
/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
246
 *
247
 * r  [in]  Field element to hold result.
248
 * a  [in]  Field element to square.
249
 */
250
void fe448_sqr(word8* r, const word8* a)
251
{
252
    int i, k;
253
    sword32 c = 0;
254
    sword32 p;
255
    sword16 o = 0, cc = 0;
256
    word8 t[112];
257
258
    for (k = 0; k < 56; k++) {
259
        i = 0;
260
        for (; i <= k; i++) {
261
            if (k - i < i)
262
                break;
263
            p = (sword32)a[i] * a[k - i];
264
            if (k - i != i)
265
                p *= 2;
266
            c += p;
267
        }
268
        t[k] = (word8)c;
269
        c >>= 8;
270
    }
271
    for (; k < 111; k++) {
272
         i = k - 55;
273
        for (; i < 56; i++) {
274
            if (k - i < i)
275
                break;
276
            p = (sword32)a[i] * a[k - i];
277
            if (k - i != i)
278
                p *= 2;
279
            c += p;
280
        }
281
        t[k] = (word8)c;
282
        c >>= 8;
283
    }
284
    t[k] = (word8)c;
285
286
    for (i = 0; i < 28; i++) {
287
        o += t[i];
288
        o += t[i + 56];
289
        o += t[i + 84];
290
        r[i] = (word8)o;
291
        o >>= 8;
292
    }
293
    for (i = 28; i < 56; i++) {
294
        o += t[i];
295
        o += t[i + 56];
296
        o += t[i + 28];
297
        o += t[i + 56];
298
        r[i] = (word8)o;
299
        o >>= 8;
300
    }
301
    for (i = 0; i < 56; i++) {
302
        if ((i == 0) || (i == 28)) cc += o;
303
        cc += r[i];
304
        r[i] = (word8)cc;
305
        cc >>= 8;
306
    }
307
    fe448_norm(r);
308
}
309
310
/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
311
 * Constant time implementation - using Fermat's little theorem:
312
 *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
313
 * For curve448: p - 2 = 2^448 - 2^224 - 3
314
 *
315
 * r  [in]  Field element to hold result.
316
 * a  [in]  Field element to invert.
317
 */
318
void fe448_invert(word8* r, const word8* a)
319
{
320
    int i;
321
    word8 t[56];
322
323
    fe448_sqr(t, a);
324
    fe448_mul(t, t, a);
325
    for (i = 0; i < 221; i++) {
326
        fe448_sqr(t, t);
327
        fe448_mul(t, t, a);
328
    }
329
    fe448_sqr(t, t);
330
    for (i = 0; i < 222; i++) {
331
        fe448_sqr(t, t);
332
        fe448_mul(t, t, a);
333
    }
334
    fe448_sqr(t, t);
335
    fe448_sqr(t, t);
336
    fe448_mul(r, t, a);
337
}
338
339
/* Scalar multiply the point by a number. r = n.a
340
 * Uses Montgomery ladder and only requires the x-ordinate.
341
 *
342
 * r  [in]  Field element to hold result.
343
 * n  [in]  Scalar as an array of bytes.
344
 * a  [in]  Point to multiply - x-ordinate only.
345
 */
346
int curve448(byte* r, const byte* n, const byte* a)
347
{
348
    word8 x1[56];
349
    word8 x2[56] = {1};
350
    word8 z2[56] = {0};
351
    word8 x3[56];
352
    word8 z3[56] = {1};
353
    word8 t0[56];
354
    word8 t1[56];
355
    int i;
356
    unsigned int swap;
357
358
    fe448_copy(x1, a);
359
    fe448_copy(x3, a);
360
361
    swap = 0;
362
    for (i = 447; i >= 0; --i) {
363
        unsigned int b = (n[i >> 3] >> (i & 7)) & 1;
364
        swap ^= b;
365
        fe448_cswap(x2, x3, swap);
366
        fe448_cswap(z2, z3, swap);
367
        swap = b;
368
369
        /* Montgomery Ladder - double and add */
370
        fe448_add(t0, x2, z2);
371
        fe448_add(t1, x3, z3);
372
        fe448_sub(x2, x2, z2);
373
        fe448_sub(x3, x3, z3);
374
        fe448_mul(t1, t1, x2);
375
        fe448_mul(z3, x3, t0);
376
        fe448_sqr(t0, t0);
377
        fe448_sqr(x2, x2);
378
        fe448_add(x3, z3, t1);
379
        fe448_sqr(x3, x3);
380
        fe448_sub(z3, z3, t1);
381
        fe448_sqr(z3, z3);
382
        fe448_mul(z3, z3, x1);
383
        fe448_sub(t1, t0, x2);
384
        fe448_mul(x2, t0, x2);
385
        fe448_mul39081(z2, t1);
386
        fe448_add(z2, t0, z2);
387
        fe448_mul(z2, z2, t1);
388
    }
389
    fe448_cswap(x2, x3, swap);
390
    fe448_cswap(z2, z3, swap);
391
392
    fe448_invert(z2, z2);
393
    fe448_mul(r, x2, z2);
394
    fe448_norm(r);
395
396
    return 0;
397
}
398
399
#ifdef HAVE_ED448
400
/* Check whether field element is not 0.
401
 * Field element must have been normalized before call.
402
 *
403
 * a  [in]  Field element.
404
 * returns 0 when zero, and any other value otherwise.
405
 */
406
int fe448_isnonzero(const word8* a)
407
{
408
    int i;
409
    byte c = 0;
410
    for (i = 0; i < 56; i++)
411
        c |= a[i];
412
    return c;
413
}
414
415
/* Negates the field element. r = -a mod (2^448 - 2^224 - 1)
416
 * Add 0x200 to each element and subtract 2 from next.
417
 * Top element overflow handled by subtracting 2 from index 0 and 28.
418
 *
419
 * r  [in]  Field element to hold result.
420
 * a  [in]  Field element.
421
 */
422
void fe448_neg(word8* r, const word8* a)
423
{
424
    int i;
425
    sword16 c = 0;
426
    sword16 o = 0;
427
428
    for (i = 0; i < 56; i++) {
429
        if (i == 28)
430
            c += 0x1fc;
431
        else
432
            c += 0x1fe;
433
        c -= a[i];
434
        r[i] = (word8)c;
435
        c >>= 8;
436
    }
437
438
    for (i = 0; i < 56; i++) {
439
        if ((i == 0) || (i == 28)) o += c;
440
        o += r[i];
441
        r[i] = (word8)o;
442
        o >>= 8;
443
    }
444
}
445
446
/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
447
 * Used for calculating y-ordinate from x-ordinate for Ed448.
448
 *
449
 * r  [in]  Field element to hold result.
450
 * a  [in]  Field element to exponentiate.
451
 */
452
void fe448_pow_2_446_222_1(word8* r, const word8* a)
453
{
454
    int i;
455
    word8 t[56];
456
457
    fe448_sqr(t, a);
458
    fe448_mul(t, t, a);
459
    for (i = 0; i < 221; i++) {
460
        fe448_sqr(t, t);
461
        fe448_mul(t, t, a);
462
    }
463
    fe448_sqr(t, t);
464
    for (i = 0; i < 221; i++) {
465
        fe448_sqr(t, t);
466
        fe448_mul(t, t, a);
467
    }
468
    fe448_sqr(t, t);
469
    fe448_mul(r, t, a);
470
}
471
472
/* Constant time, conditional move of b into a.
473
 * a is not changed if the condition is 0.
474
 *
475
 * a  A field element.
476
 * b  A field element.
477
 * c  If 1 then copy and if 0 then don't copy.
478
 */
479
void fe448_cmov(word8* a, const word8* b, int c)
480
{
481
    int i;
482
    word8 m = -(word8)c;
483
    word8 t[56];
484
485
    for (i = 0; i < 56; i++)
486
        t[i] = m & (a[i] ^ b[i]);
487
    for (i = 0; i < 56; i++)
488
        a[i] ^= t[i];
489
}
490
491
#endif /* HAVE_ED448 */
492
#elif defined(CURVED448_128BIT)
493
494
/* Initialize the field element operations.
495
 */
496
void fe448_init(void)
497
7.41k
{
498
7.41k
}
499
500
/* Convert the field element from a byte array to an array of 56-bits.
501
 *
502
 * r  [in]  Array to encode into.
503
 * b  [in]  Byte array.
504
 */
505
void fe448_from_bytes(sword64* r, const unsigned char* b)
506
6.32k
{
507
6.32k
    r[ 0] = (sword64)(
508
6.32k
             (((word64)b[ 0]) <<  0)
509
6.32k
          |  (((word64)b[ 1]) <<  8)
510
6.32k
          |  (((word64)b[ 2]) << 16)
511
6.32k
          |  (((word64)b[ 3]) << 24)
512
6.32k
          |  (((word64)b[ 4]) << 32)
513
6.32k
          |  (((word64)b[ 5]) << 40)
514
6.32k
          |  (((word64)b[ 6]) << 48));
515
6.32k
    r[ 1] = (sword64)(
516
6.32k
             (((word64)b[ 7]) <<  0)
517
6.32k
          |  (((word64)b[ 8]) <<  8)
518
6.32k
          |  (((word64)b[ 9]) << 16)
519
6.32k
          |  (((word64)b[10]) << 24)
520
6.32k
          |  (((word64)b[11]) << 32)
521
6.32k
          |  (((word64)b[12]) << 40)
522
6.32k
          |  (((word64)b[13]) << 48));
523
6.32k
    r[ 2] = (sword64)(
524
6.32k
             (((word64)b[14]) <<  0)
525
6.32k
          |  (((word64)b[15]) <<  8)
526
6.32k
          |  (((word64)b[16]) << 16)
527
6.32k
          |  (((word64)b[17]) << 24)
528
6.32k
          |  (((word64)b[18]) << 32)
529
6.32k
          |  (((word64)b[19]) << 40)
530
6.32k
          |  (((word64)b[20]) << 48));
531
6.32k
    r[ 3] = (sword64)(
532
6.32k
             (((word64)b[21]) <<  0)
533
6.32k
          |  (((word64)b[22]) <<  8)
534
6.32k
          |  (((word64)b[23]) << 16)
535
6.32k
          |  (((word64)b[24]) << 24)
536
6.32k
          |  (((word64)b[25]) << 32)
537
6.32k
          |  (((word64)b[26]) << 40)
538
6.32k
          |  (((word64)b[27]) << 48));
539
6.32k
    r[ 4] = (sword64)(
540
6.32k
             (((word64)b[28]) <<  0)
541
6.32k
          |  (((word64)b[29]) <<  8)
542
6.32k
          |  (((word64)b[30]) << 16)
543
6.32k
          |  (((word64)b[31]) << 24)
544
6.32k
          |  (((word64)b[32]) << 32)
545
6.32k
          |  (((word64)b[33]) << 40)
546
6.32k
          |  (((word64)b[34]) << 48));
547
6.32k
    r[ 5] = (sword64)(
548
6.32k
             (((word64)b[35]) <<  0)
549
6.32k
          |  (((word64)b[36]) <<  8)
550
6.32k
          |  (((word64)b[37]) << 16)
551
6.32k
          |  (((word64)b[38]) << 24)
552
6.32k
          |  (((word64)b[39]) << 32)
553
6.32k
          |  (((word64)b[40]) << 40)
554
6.32k
          |  (((word64)b[41]) << 48));
555
6.32k
    r[ 6] = (sword64)(
556
6.32k
             (((word64)b[42]) <<  0)
557
6.32k
          |  (((word64)b[43]) <<  8)
558
6.32k
          |  (((word64)b[44]) << 16)
559
6.32k
          |  (((word64)b[45]) << 24)
560
6.32k
          |  (((word64)b[46]) << 32)
561
6.32k
          |  (((word64)b[47]) << 40)
562
6.32k
          |  (((word64)b[48]) << 48));
563
6.32k
    r[ 7] = (sword64)(
564
6.32k
             (((word64)b[49]) <<  0)
565
6.32k
          |  (((word64)b[50]) <<  8)
566
6.32k
          |  (((word64)b[51]) << 16)
567
6.32k
          |  (((word64)b[52]) << 24)
568
6.32k
          |  (((word64)b[53]) << 32)
569
6.32k
          |  (((word64)b[54]) << 40)
570
6.32k
          |  (((word64)b[55]) << 48));
571
6.32k
}
572
573
/* Convert the field element to a byte array from an array of 56-bits.
574
 *
575
 * b  [in]  Byte array.
576
 * a  [in]  Array to encode into.
577
 */
578
void fe448_to_bytes(unsigned char* b, const sword64* a)
579
24.5k
{
580
24.5k
    sword64 t;
581
    /* Mod */
582
24.5k
    sword64 in0 = a[0];
583
24.5k
    sword64 in1 = a[1];
584
24.5k
    sword64 in2 = a[2];
585
24.5k
    sword64 in3 = a[3];
586
24.5k
    sword64 in4 = a[4];
587
24.5k
    sword64 in5 = a[5];
588
24.5k
    sword64 in6 = a[6];
589
24.5k
    sword64 in7 = a[7];
590
24.5k
    sword64 o = in7 >> 56;
591
24.5k
    in7 -= (sword64)((word64)o << 56);
592
24.5k
    in0 += o;
593
24.5k
    in4 += o;
594
24.5k
    o = (in0 + 1) >> 56;
595
24.5k
    o = (o + in1) >> 56;
596
24.5k
    o = (o + in2) >> 56;
597
24.5k
    o = (o + in3) >> 56;
598
24.5k
    o = (o + in4 + 1) >> 56;
599
24.5k
    o = (o + in5) >> 56;
600
24.5k
    o = (o + in6) >> 56;
601
24.5k
    o = (o + in7) >> 56;
602
24.5k
    in0 += o;
603
24.5k
    in4 += o;
604
24.5k
    in7 -= (sword64)((word64)o << 56);
605
24.5k
    o = (sword64)(in0  >> 56); in1  += o;
606
24.5k
    t = (sword64)((word64)o << 56); in0  -= t;
607
24.5k
    o = (sword64)(in1  >> 56); in2  += o;
608
24.5k
    t = (sword64)((word64)o << 56); in1  -= t;
609
24.5k
    o = (sword64)(in2  >> 56); in3  += o;
610
24.5k
    t = (sword64)((word64)o << 56); in2  -= t;
611
24.5k
    o = (sword64)(in3  >> 56); in4  += o;
612
24.5k
    t = (sword64)((word64)o << 56); in3  -= t;
613
24.5k
    o = (sword64)(in4  >> 56); in5  += o;
614
24.5k
    t = (sword64)((word64)o << 56); in4  -= t;
615
24.5k
    o = (sword64)(in5  >> 56); in6  += o;
616
24.5k
    t = (sword64)((word64)o << 56); in5  -= t;
617
24.5k
    o = (sword64)(in6  >> 56); in7  += o;
618
24.5k
    t = (sword64)((word64)o << 56); in6  -= t;
619
24.5k
    o = (sword64)(in7  >> 56); in0  += o;
620
24.5k
                               in4  += o;
621
24.5k
    t = (sword64)((word64)o << 56); in7  -= t;
622
623
    /* Output as bytes */
624
24.5k
    b[ 0] = (byte)(in0  >>  0);
625
24.5k
    b[ 1] = (byte)(in0  >>  8);
626
24.5k
    b[ 2] = (byte)(in0  >> 16);
627
24.5k
    b[ 3] = (byte)(in0  >> 24);
628
24.5k
    b[ 4] = (byte)(in0  >> 32);
629
24.5k
    b[ 5] = (byte)(in0  >> 40);
630
24.5k
    b[ 6] = (byte)(in0  >> 48);
631
24.5k
    b[ 7] = (byte)(in1  >>  0);
632
24.5k
    b[ 8] = (byte)(in1  >>  8);
633
24.5k
    b[ 9] = (byte)(in1  >> 16);
634
24.5k
    b[10] = (byte)(in1  >> 24);
635
24.5k
    b[11] = (byte)(in1  >> 32);
636
24.5k
    b[12] = (byte)(in1  >> 40);
637
24.5k
    b[13] = (byte)(in1  >> 48);
638
24.5k
    b[14] = (byte)(in2  >>  0);
639
24.5k
    b[15] = (byte)(in2  >>  8);
640
24.5k
    b[16] = (byte)(in2  >> 16);
641
24.5k
    b[17] = (byte)(in2  >> 24);
642
24.5k
    b[18] = (byte)(in2  >> 32);
643
24.5k
    b[19] = (byte)(in2  >> 40);
644
24.5k
    b[20] = (byte)(in2  >> 48);
645
24.5k
    b[21] = (byte)(in3  >>  0);
646
24.5k
    b[22] = (byte)(in3  >>  8);
647
24.5k
    b[23] = (byte)(in3  >> 16);
648
24.5k
    b[24] = (byte)(in3  >> 24);
649
24.5k
    b[25] = (byte)(in3  >> 32);
650
24.5k
    b[26] = (byte)(in3  >> 40);
651
24.5k
    b[27] = (byte)(in3  >> 48);
652
24.5k
    b[28] = (byte)(in4  >>  0);
653
24.5k
    b[29] = (byte)(in4  >>  8);
654
24.5k
    b[30] = (byte)(in4  >> 16);
655
24.5k
    b[31] = (byte)(in4  >> 24);
656
24.5k
    b[32] = (byte)(in4  >> 32);
657
24.5k
    b[33] = (byte)(in4  >> 40);
658
24.5k
    b[34] = (byte)(in4  >> 48);
659
24.5k
    b[35] = (byte)(in5  >>  0);
660
24.5k
    b[36] = (byte)(in5  >>  8);
661
24.5k
    b[37] = (byte)(in5  >> 16);
662
24.5k
    b[38] = (byte)(in5  >> 24);
663
24.5k
    b[39] = (byte)(in5  >> 32);
664
24.5k
    b[40] = (byte)(in5  >> 40);
665
24.5k
    b[41] = (byte)(in5  >> 48);
666
24.5k
    b[42] = (byte)(in6  >>  0);
667
24.5k
    b[43] = (byte)(in6  >>  8);
668
24.5k
    b[44] = (byte)(in6  >> 16);
669
24.5k
    b[45] = (byte)(in6  >> 24);
670
24.5k
    b[46] = (byte)(in6  >> 32);
671
24.5k
    b[47] = (byte)(in6  >> 40);
672
24.5k
    b[48] = (byte)(in6  >> 48);
673
24.5k
    b[49] = (byte)(in7  >>  0);
674
24.5k
    b[50] = (byte)(in7  >>  8);
675
24.5k
    b[51] = (byte)(in7  >> 16);
676
24.5k
    b[52] = (byte)(in7  >> 24);
677
24.5k
    b[53] = (byte)(in7  >> 32);
678
24.5k
    b[54] = (byte)(in7  >> 40);
679
24.5k
    b[55] = (byte)(in7  >> 48);
680
24.5k
}
681
682
/* Set the field element to 0.
683
 *
684
 * a  [in]  Field element.
685
 */
686
void fe448_1(sword64* a)
687
583k
{
688
583k
    a[0] = 1;
689
583k
    a[1] = 0;
690
583k
    a[2] = 0;
691
583k
    a[3] = 0;
692
583k
    a[4] = 0;
693
583k
    a[5] = 0;
694
583k
    a[6] = 0;
695
583k
    a[7] = 0;
696
583k
}
697
698
/* Set the field element to 0.
699
 *
700
 * a  [in]  Field element.
701
 */
702
void fe448_0(sword64* a)
703
570k
{
704
570k
    a[0] = 0;
705
570k
    a[1] = 0;
706
570k
    a[2] = 0;
707
570k
    a[3] = 0;
708
570k
    a[4] = 0;
709
570k
    a[5] = 0;
710
570k
    a[6] = 0;
711
570k
    a[7] = 0;
712
570k
}
713
714
/* Copy one field element into another: d = a.
715
 *
716
 * d  [in]  Destination field element.
717
 * a  [in]  Source field element.
718
 */
719
void fe448_copy(sword64* d, const sword64* a)
720
16.8k
{
721
16.8k
    d[0] = a[0];
722
16.8k
    d[1] = a[1];
723
16.8k
    d[2] = a[2];
724
16.8k
    d[3] = a[3];
725
16.8k
    d[4] = a[4];
726
16.8k
    d[5] = a[5];
727
16.8k
    d[6] = a[6];
728
16.8k
    d[7] = a[7];
729
16.8k
}
730
731
/* Conditionally swap the elements.
732
 * Constant time implementation.
733
 *
734
 * a  [in]  First field element.
735
 * b  [in]  Second field element.
736
 * c  [in]  Swap when 1. Valid values: 0, 1.
737
 */
738
static void fe448_cswap(sword64* a, sword64* b, int c)
739
1.43M
{
740
1.43M
    sword64 mask = -(sword64)c;
741
1.43M
    sword64 t0 = (a[0] ^ b[0]) & mask;
742
1.43M
    sword64 t1 = (a[1] ^ b[1]) & mask;
743
1.43M
    sword64 t2 = (a[2] ^ b[2]) & mask;
744
1.43M
    sword64 t3 = (a[3] ^ b[3]) & mask;
745
1.43M
    sword64 t4 = (a[4] ^ b[4]) & mask;
746
1.43M
    sword64 t5 = (a[5] ^ b[5]) & mask;
747
1.43M
    sword64 t6 = (a[6] ^ b[6]) & mask;
748
1.43M
    sword64 t7 = (a[7] ^ b[7]) & mask;
749
1.43M
    a[0] ^= t0;
750
1.43M
    a[1] ^= t1;
751
1.43M
    a[2] ^= t2;
752
1.43M
    a[3] ^= t3;
753
1.43M
    a[4] ^= t4;
754
1.43M
    a[5] ^= t5;
755
1.43M
    a[6] ^= t6;
756
1.43M
    a[7] ^= t7;
757
1.43M
    b[0] ^= t0;
758
1.43M
    b[1] ^= t1;
759
1.43M
    b[2] ^= t2;
760
1.43M
    b[3] ^= t3;
761
1.43M
    b[4] ^= t4;
762
1.43M
    b[5] ^= t5;
763
1.43M
    b[6] ^= t6;
764
1.43M
    b[7] ^= t7;
765
1.43M
}
766
767
/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
768
 *
769
 * r  [in]  Field element to hold sum.
770
 * a  [in]  Field element to add.
771
 * b  [in]  Field element to add.
772
 */
773
void fe448_add(sword64* r, const sword64* a, const sword64* b)
774
7.74M
{
775
7.74M
    r[0] = a[0] + b[0];
776
7.74M
    r[1] = a[1] + b[1];
777
7.74M
    r[2] = a[2] + b[2];
778
7.74M
    r[3] = a[3] + b[3];
779
7.74M
    r[4] = a[4] + b[4];
780
7.74M
    r[5] = a[5] + b[5];
781
7.74M
    r[6] = a[6] + b[6];
782
7.74M
    r[7] = a[7] + b[7];
783
7.74M
}
784
785
/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
786
 *
787
 * r  [in]  Field element to hold difference.
788
 * a  [in]  Field element to subtract from.
789
 * b  [in]  Field element to subtract.
790
 */
791
void fe448_sub(sword64* r, const sword64* a, const sword64* b)
792
8.33M
{
793
8.33M
    r[0] = a[0] - b[0];
794
8.33M
    r[1] = a[1] - b[1];
795
8.33M
    r[2] = a[2] - b[2];
796
8.33M
    r[3] = a[3] - b[3];
797
8.33M
    r[4] = a[4] - b[4];
798
8.33M
    r[5] = a[5] - b[5];
799
8.33M
    r[6] = a[6] - b[6];
800
8.33M
    r[7] = a[7] - b[7];
801
8.33M
}
802
803
/* Multiply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
804
 *
805
 * r  [in]  Field element to hold result.
806
 * a  [in]  Field element to multiply.
807
 */
808
void fe448_mul39081(sword64* r, const sword64* a)
809
1.51M
{
810
1.51M
    sword128 t;
811
1.51M
    sword64 o;
812
1.51M
    sword128 t0 = a[0] * (sword128)39081;
813
1.51M
    sword128 t1 = a[1] * (sword128)39081;
814
1.51M
    sword128 t2 = a[2] * (sword128)39081;
815
1.51M
    sword128 t3 = a[3] * (sword128)39081;
816
1.51M
    sword128 t4 = a[4] * (sword128)39081;
817
1.51M
    sword128 t5 = a[5] * (sword128)39081;
818
1.51M
    sword128 t6 = a[6] * (sword128)39081;
819
1.51M
    sword128 t7 = a[7] * (sword128)39081;
820
1.51M
    o = (sword64)(t0  >> 56); t1  += o;
821
1.51M
    t = (sword128)((word128)o << 56); t0  -= t;
822
1.51M
    o = (sword64)(t1  >> 56); t2  += o;
823
1.51M
    t = (sword128)((word128)o << 56); t1  -= t;
824
1.51M
    o = (sword64)(t2  >> 56); t3  += o;
825
1.51M
    t = (sword128)((word128)o << 56); t2  -= t;
826
1.51M
    o = (sword64)(t3  >> 56); t4  += o;
827
1.51M
    t = (sword128)((word128)o << 56); t3  -= t;
828
1.51M
    o = (sword64)(t4  >> 56); t5  += o;
829
1.51M
    t = (sword128)((word128)o << 56); t4  -= t;
830
1.51M
    o = (sword64)(t5  >> 56); t6  += o;
831
1.51M
    t = (sword128)((word128)o << 56); t5  -= t;
832
1.51M
    o = (sword64)(t6  >> 56); t7  += o;
833
1.51M
    t = (sword128)((word128)o << 56); t6  -= t;
834
1.51M
    o = (sword64)(t7  >> 56); t0  += o;
835
1.51M
                              t4  += o;
836
1.51M
    t = (sword128)((word128)o << 56); t7  -= t;
837
838
    /* Store */
839
1.51M
    r[0] = (sword64)t0;
840
1.51M
    r[1] = (sword64)t1;
841
1.51M
    r[2] = (sword64)t2;
842
1.51M
    r[3] = (sword64)t3;
843
1.51M
    r[4] = (sword64)t4;
844
1.51M
    r[5] = (sword64)t5;
845
1.51M
    r[6] = (sword64)t6;
846
1.51M
    r[7] = (sword64)t7;
847
1.51M
}
848
849
/* Multiply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
850
 *
851
 * r  [in]  Field element to hold result.
852
 * a  [in]  Field element to multiply.
853
 * b  [in]  Field element to multiply.
854
 */
855
void fe448_mul(sword64* r, const sword64* a, const sword64* b)
856
13.4M
{
857
13.4M
    sword64 o;
858
13.4M
    sword64 a1[4];
859
13.4M
    sword64 b1[4];
860
13.4M
    sword128 t0;
861
13.4M
    sword128 t1;
862
13.4M
    sword128 t2;
863
13.4M
    sword128 t3;
864
13.4M
    sword128 t4;
865
13.4M
    sword128 t5;
866
13.4M
    sword128 t6;
867
13.4M
    sword128 t7;
868
13.4M
    sword128 t03;
869
13.4M
    sword128 t04;
870
13.4M
    sword128 t13;
871
13.4M
    sword128 t14;
872
873
13.4M
    a1[0] = a[0] + a[4];
874
13.4M
    a1[1] = a[1] + a[5];
875
13.4M
    a1[2] = a[2] + a[6];
876
13.4M
    a1[3] = a[3] + a[7];
877
13.4M
    b1[0] = b[0] + b[4];
878
13.4M
    b1[1] = b[1] + b[5];
879
13.4M
    b1[2] = b[2] + b[6];
880
13.4M
    b1[3] = b[3] + b[7];
881
882
13.4M
    t03 = ((sword128)a[0] * b[3]) + ((sword128)a[1] * b[2])
883
13.4M
       + ((sword128)a[2] * b[1]) + ((sword128)a[3] * b[0]);
884
13.4M
    t04 = ((sword128)a[1] * b[3]) + ((sword128)a[2] * b[2])
885
13.4M
       + ((sword128)a[3] * b[1]);
886
13.4M
    t04 += t03 >> 56;
887
13.4M
    t03 &= 0xffffffffffffffL;
888
13.4M
    t13 = ((sword128)a1[0] * b1[3]) + ((sword128)a1[1] * b1[2])
889
13.4M
       + ((sword128)a1[2] * b1[1]) + ((sword128)a1[3] * b1[0]);
890
13.4M
    t14 = ((sword128)a1[1] * b1[3]) + ((sword128)a1[2] * b1[2])
891
13.4M
       + ((sword128)a1[3] * b1[1]);
892
13.4M
    t14 += t13 >> 56;
893
13.4M
    t13 &= 0xffffffffffffffL;
894
895
13.4M
    t0 = ((sword128)a[0] * b[0]) + ((sword128)a[4] * b[4]) + t14 + -t04;
896
13.4M
    t1 = ((sword128)a[0] * b[1]) + ((sword128)a[1] * b[0])
897
13.4M
       + ((sword128)a[4] * b[5]) + ((sword128)a[5] * b[4])
898
13.4M
       + ((sword128)a1[2] * b1[3]) + ((sword128)a1[3] * b1[2])
899
13.4M
       - ((sword128)a[2] * b[3]) - ((sword128)a[3] * b[2]);
900
13.4M
    o = (sword64)(t0  >> 56); t1 += o; t0 &= 0xffffffffffffffL;
901
13.4M
    t2 = ((sword128)a[0] * b[2]) + ((sword128)a[1] * b[1])
902
13.4M
       + ((sword128)a[2] * b[0]) + ((sword128)a[4] * b[6])
903
13.4M
       + ((sword128)a[5] * b[5]) + ((sword128)a[6] * b[4])
904
13.4M
       + ((sword128)a1[3] * b1[3]) - ((sword128)a[3] * b[3]);
905
13.4M
    o = (sword64)(t1  >> 56); t2 += o; t1 &= 0xffffffffffffffL;
906
13.4M
    t3 = t03 + ((sword128)a[4] * b[7]) + ((sword128)a[5] * b[6])
907
13.4M
       + ((sword128)a[6] * b[5]) + ((sword128)a[7] * b[4]);
908
13.4M
    o = (sword64)(t2  >> 56); t3 += o; t2 &= 0xffffffffffffffL;
909
13.4M
    t4 = ((sword128)a[5] * b[7]) + ((sword128)a[6] * b[6])
910
13.4M
       + ((sword128)a[7] * b[5]) + ((sword128)a1[0] * b1[0])
911
13.4M
       - ((sword128)a[0] * b[0]) + t14;
912
13.4M
    o = (sword64)(t3  >> 56); t4 += o; t3 &= 0xffffffffffffffL;
913
13.4M
    t5 = ((sword128)a[6] * b[7]) + ((sword128)a[7] * b[6])
914
13.4M
       + ((sword128)a1[0] * b1[1]) + ((sword128)a1[1] * b1[0])
915
13.4M
       - ((sword128)a[0] * b[1]) - ((sword128)a[1] * b[0])
916
13.4M
       + ((sword128)a1[2] * b1[3]) + ((sword128)a1[3] * b1[2]);
917
13.4M
    o = (sword64)(t4  >> 56); t5 += o; t4 &= 0xffffffffffffffL;
918
13.4M
    t6 = ((sword128)a[7] * b[7]) + ((sword128)a1[0] * b1[2])
919
13.4M
       + ((sword128)a1[1] * b1[1]) + ((sword128)a1[2] * b1[0])
920
13.4M
       - ((sword128)a[0] * b[2]) - ((sword128)a[1] * b[1])
921
13.4M
       - ((sword128)a[2] * b[0]) + ((sword128)a1[3] * b1[3]);
922
13.4M
    o = (sword64)(t5  >> 56); t6 += o; t5 &= 0xffffffffffffffL;
923
13.4M
    t7 = t13 + -t03;
924
13.4M
    o = (sword64)(t6  >> 56); t7 += o; t6 &= 0xffffffffffffffL;
925
13.4M
    o = (sword64)(t7 >> 56); t0  += o;
926
13.4M
                   t4  += o; t7 &= 0xffffffffffffffL;
927
928
    /* Store */
929
13.4M
    r[0] = (sword64)t0;
930
13.4M
    r[1] = (sword64)t1;
931
13.4M
    r[2] = (sword64)t2;
932
13.4M
    r[3] = (sword64)t3;
933
13.4M
    r[4] = (sword64)t4;
934
13.4M
    r[5] = (sword64)t5;
935
13.4M
    r[6] = (sword64)t6;
936
13.4M
    r[7] = (sword64)t7;
937
13.4M
}
938
939
/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
940
 *
941
 * r  [in]  Field element to hold result.
942
 * a  [in]  Field element to square.
943
 */
944
void fe448_sqr(sword64* r, const sword64* a)
945
12.8M
{
946
12.8M
    sword64 o;
947
12.8M
    sword64 a1[4];
948
12.8M
    sword128 t0;
949
12.8M
    sword128 t1;
950
12.8M
    sword128 t2;
951
12.8M
    sword128 t3;
952
12.8M
    sword128 t4;
953
12.8M
    sword128 t5;
954
12.8M
    sword128 t6;
955
12.8M
    sword128 t7;
956
12.8M
    sword128 t03;
957
12.8M
    sword128 t04;
958
12.8M
    sword128 t13;
959
12.8M
    sword128 t14;
960
961
12.8M
    a1[0] = a[0] + a[4];
962
12.8M
    a1[1] = a[1] + a[5];
963
12.8M
    a1[2] = a[2] + a[6];
964
12.8M
    a1[3] = a[3] + a[7];
965
966
12.8M
    t03 = ((sword128)a[0] * (2 * a[3])) + ((sword128)a[1] * (2 * a[2]));
967
12.8M
    t04 = ((sword128)a[1] * (2 * a[3])) + ((sword128)a[2] * a[2]);
968
12.8M
    t04 += t03 >> 56;
969
12.8M
    t03 &= 0xffffffffffffffL;
970
12.8M
    t13 = ((sword128)a1[0] * (2 * a1[3])) + ((sword128)a1[1] * (2 * a1[2]));
971
12.8M
    t14 = ((sword128)a1[1] * (2 * a1[3])) + ((sword128)a1[2] * a1[2]);
972
12.8M
    t14 += t13 >> 56;
973
12.8M
    t13 &= 0xffffffffffffffL;
974
975
12.8M
    t0 = ((sword128)a[0] * a[0]) + ((sword128)a[4] * a[4]) + t14 + -t04;
976
12.8M
    t1 = ((sword128)a[0] * (2 * a[1])) + ((sword128)a[4] * (2 * a[5]))
977
12.8M
       + ((sword128)a1[2] * (2 * a1[3])) - ((sword128)a[2] * (2 * a[3]));
978
12.8M
    o = (sword64)(t0  >> 56); t1 += o; t0 &= 0xffffffffffffffL;
979
12.8M
    t2 = ((sword128)a[0] * (2 * a[2])) + ((sword128)a[1] * a[1])
980
12.8M
       + ((sword128)a[4] * (2 * a[6])) + ((sword128)a[5] * a[5])
981
12.8M
       + ((sword128)a1[3] * a1[3]) - ((sword128)a[3] * a[3]);
982
12.8M
    o = (sword64)(t1  >> 56); t2 += o; t1 &= 0xffffffffffffffL;
983
12.8M
    t3 = t03 + ((sword128)a[4] * (2 * a[7])) + ((sword128)a[5] * (2 * a[6]));
984
12.8M
    o = (sword64)(t2  >> 56); t3 += o; t2 &= 0xffffffffffffffL;
985
12.8M
    t4 = ((sword128)a[5] * (2 * a[7])) + ((sword128)a[6] * a[6])
986
12.8M
       + ((sword128)a1[0] * a1[0]) - ((sword128)a[0] * a[0]) + t14;
987
12.8M
    o = (sword64)(t3  >> 56); t4 += o; t3 &= 0xffffffffffffffL;
988
12.8M
    t5 = ((sword128)a[6] * (2 * a[7])) + ((sword128)a1[0] * (2 * a1[1]))
989
12.8M
       - ((sword128)a[0] * (2 * a[1])) + ((sword128)a1[2] * (2 * a1[3]));
990
12.8M
    o = (sword64)(t4  >> 56); t5 += o; t4 &= 0xffffffffffffffL;
991
12.8M
    t6 = ((sword128)a[7] * a[7]) + ((sword128)a1[0] * (2 * a1[2]))
992
12.8M
       + ((sword128)a1[1] * a1[1]) - ((sword128)a[0] * (2 * a[2]))
993
12.8M
       - ((sword128)a[1] * a[1]) + ((sword128)a1[3] * a1[3]);
994
12.8M
    o = (sword64)(t5  >> 56); t6 += o; t5 &= 0xffffffffffffffL;
995
12.8M
    t7 = t13 + -t03;
996
12.8M
    o = (sword64)(t6  >> 56); t7 += o; t6 &= 0xffffffffffffffL;
997
12.8M
    o = (sword64)(t7 >> 56); t0  += o;
998
12.8M
                   t4  += o; t7 &= 0xffffffffffffffL;
999
1000
    /* Store */
1001
12.8M
    r[0] = (sword64)t0;
1002
12.8M
    r[1] = (sword64)t1;
1003
12.8M
    r[2] = (sword64)t2;
1004
12.8M
    r[3] = (sword64)t3;
1005
12.8M
    r[4] = (sword64)t4;
1006
12.8M
    r[5] = (sword64)t5;
1007
12.8M
    r[6] = (sword64)t6;
1008
12.8M
    r[7] = (sword64)t7;
1009
12.8M
}
1010
1011
/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
1012
 * Constant time implementation - using Fermat's little theorem:
1013
 *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
1014
 * For curve448: p - 2 = 2^448 - 2^224 - 3
1015
 *
1016
 * r  [in]  Field element to hold result.
1017
 * a  [in]  Field element to invert.
1018
 */
1019
void fe448_invert(sword64* r, const sword64* a)
1020
8.37k
{
1021
8.37k
    sword64 t1[8];
1022
8.37k
    sword64 t2[8];
1023
8.37k
    sword64 t3[8];
1024
8.37k
    sword64 t4[8];
1025
8.37k
    int i;
1026
1027
8.37k
    fe448_sqr(t1, a);
1028
    /* t1 = 2 */
1029
8.37k
    fe448_mul(t1, t1, a);
1030
    /* t1 = 3 */
1031
16.7k
    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1032
    /* t2 = c */
1033
8.37k
    fe448_mul(t3, t2, a);
1034
    /* t3 = d */
1035
8.37k
    fe448_mul(t1, t2, t1);
1036
    /* t1 = f */
1037
8.37k
    fe448_sqr(t2, t1);
1038
    /* t2 = 1e */
1039
8.37k
    fe448_mul(t4, t2, a);
1040
    /* t4 = 1f */
1041
41.8k
    fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1042
    /* t2 = 3e0 */
1043
8.37k
    fe448_mul(t1, t2, t4);
1044
    /* t1 = 3ff */
1045
83.7k
    fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
1046
    /* t2 = ffc00 */
1047
8.37k
    fe448_mul(t1, t2, t1);
1048
    /* t1 = fffff */
1049
41.8k
    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1050
    /* t2 = 1ffffe0 */
1051
8.37k
    fe448_mul(t1, t2, t4);
1052
    /* t1 = 1ffffff */
1053
209k
    fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
1054
    /* t2 = 3fffffe000000 */
1055
8.37k
    fe448_mul(t1, t2, t1);
1056
    /* t1 = 3ffffffffffff */
1057
41.8k
    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1058
    /* t2 = 7fffffffffffe0 */
1059
8.37k
    fe448_mul(t1, t2, t4);
1060
    /* t1 = 7fffffffffffff */
1061
460k
    fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
1062
    /* t2 = 3fffffffffffff80000000000000 */
1063
8.37k
    fe448_mul(t1, t2, t1);
1064
    /* t1 = 3fffffffffffffffffffffffffff */
1065
921k
    fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
1066
    /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
1067
8.37k
    fe448_mul(t1, t2, t1);
1068
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1069
33.4k
    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1070
    /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
1071
8.37k
    fe448_mul(t3, t3, t2);
1072
    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1073
8.37k
    fe448_mul(t1, t3, a);
1074
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1075
1.87M
    fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
1076
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
1077
8.37k
    fe448_mul(r, t3, t1);
1078
    /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1079
8.37k
}
1080
1081
/* Scalar multiply the point by a number. r = n.a
1082
 * Uses Montgomery ladder and only requires the x-ordinate.
1083
 *
1084
 * r  [in]  Field element to hold result.
1085
 * n  [in]  Scalar as an array of bytes.
1086
 * a  [in]  Point to multiply - x-ordinate only.
1087
 */
1088
int curve448(byte* r, const byte* n, const byte* a)
1089
1.60k
{
1090
1.60k
    sword64 x1[8];
1091
1.60k
    sword64 x2[8];
1092
1.60k
    sword64 z2[8];
1093
1.60k
    sword64 x3[8];
1094
1.60k
    sword64 z3[8];
1095
1.60k
    sword64 t0[8];
1096
1.60k
    sword64 t1[8];
1097
1.60k
    int i;
1098
1.60k
    unsigned int swap;
1099
1100
1.60k
    fe448_from_bytes(x1, a);
1101
1.60k
    fe448_1(x2);
1102
1.60k
    fe448_0(z2);
1103
1.60k
    fe448_copy(x3, x1);
1104
1.60k
    fe448_1(z3);
1105
1106
1.60k
    swap = 0;
1107
720k
    for (i = 447; i >= 0; --i) {
1108
719k
        unsigned int b = (n[i >> 3] >> (i & 7)) & 1;
1109
719k
        swap ^= b;
1110
719k
        fe448_cswap(x2, x3, (int)swap);
1111
719k
        fe448_cswap(z2, z3, (int)swap);
1112
719k
        swap = b;
1113
1114
        /* Montgomery Ladder - double and add */
1115
719k
        fe448_add(t0, x2, z2);
1116
719k
        fe448_reduce(t0);
1117
719k
        fe448_add(t1, x3, z3);
1118
719k
        fe448_reduce(t1);
1119
719k
        fe448_sub(x2, x2, z2);
1120
719k
        fe448_sub(x3, x3, z3);
1121
719k
        fe448_mul(t1, t1, x2);
1122
719k
        fe448_mul(z3, x3, t0);
1123
719k
        fe448_sqr(t0, t0);
1124
719k
        fe448_sqr(x2, x2);
1125
719k
        fe448_add(x3, z3, t1);
1126
719k
        fe448_reduce(x3);
1127
719k
        fe448_sqr(x3, x3);
1128
719k
        fe448_sub(z3, z3, t1);
1129
719k
        fe448_sqr(z3, z3);
1130
719k
        fe448_mul(z3, z3, x1);
1131
719k
        fe448_sub(t1, t0, x2);
1132
719k
        fe448_mul(x2, t0, x2);
1133
719k
        fe448_mul39081(z2, t1);
1134
719k
        fe448_add(z2, t0, z2);
1135
719k
        fe448_mul(z2, z2, t1);
1136
719k
    }
1137
    /* Last two bits are 0 - no final swap check required. */
1138
1139
1.60k
    fe448_invert(z2, z2);
1140
1.60k
    fe448_mul(x2, x2, z2);
1141
1.60k
    fe448_to_bytes(r, x2);
1142
1143
1.60k
    return 0;
1144
1.60k
}
1145
1146
#ifdef HAVE_ED448
1147
/* Check whether field element is not 0.
1148
 * Must convert to a normalized form before checking.
1149
 *
1150
 * a  [in]  Field element.
1151
 * returns 0 when zero, and any other value otherwise.
1152
 */
1153
int fe448_isnonzero(const sword64* a)
1154
4.71k
{
1155
4.71k
    byte b[56];
1156
4.71k
    int i;
1157
4.71k
    byte c = 0;
1158
4.71k
    fe448_to_bytes(b, a);
1159
268k
    for (i = 0; i < 56; i++)
1160
263k
        c |= b[i];
1161
4.71k
    return c;
1162
4.71k
}
1163
1164
/* Check whether field element is negative.
1165
 * Must convert to a normalized form before checking.
1166
 *
1167
 * a  [in]  Field element.
1168
 * returns 1 when negative, and 0 otherwise.
1169
 */
1170
int fe448_isnegative(const sword64* a)
1171
11.4k
{
1172
11.4k
    byte b[56];
1173
11.4k
    fe448_to_bytes(b, a);
1174
11.4k
    return b[0] & 1;
1175
11.4k
}
1176
1177
/* Negates the field element. r = -a
1178
 *
1179
 * r  [in]  Field element to hold result.
1180
 * a  [in]  Field element.
1181
 */
1182
void fe448_neg(sword64* r, const sword64* a)
1183
574k
{
1184
574k
    r[0] = -a[0];
1185
574k
    r[1] = -a[1];
1186
574k
    r[2] = -a[2];
1187
574k
    r[3] = -a[3];
1188
574k
    r[4] = -a[4];
1189
574k
    r[5] = -a[5];
1190
574k
    r[6] = -a[6];
1191
574k
    r[7] = -a[7];
1192
574k
}
1193
1194
/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
1195
 * Used for calculating y-ordinate from x-ordinate for Ed448.
1196
 *
1197
 * r  [in]  Field element to hold result.
1198
 * a  [in]  Field element to exponentiate.
1199
 */
1200
void fe448_pow_2_446_222_1(sword64* r, const sword64* a)
1201
4.71k
{
1202
4.71k
    sword64 t1[8];
1203
4.71k
    sword64 t2[8];
1204
4.71k
    sword64 t3[8];
1205
4.71k
    sword64 t4[8];
1206
4.71k
    sword64 t5[8];
1207
4.71k
    int i;
1208
1209
4.71k
    fe448_sqr(t3, a);
1210
    /* t3 = 2 */
1211
4.71k
    fe448_mul(t1, t3, a);
1212
    /* t1 = 3 */
1213
4.71k
    fe448_sqr(t5, t1);
1214
    /* t5 = 6 */
1215
4.71k
    fe448_mul(t5, t5, a);
1216
    /* t5 = 7 */
1217
9.42k
    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1218
    /* t2 = c */
1219
4.71k
    fe448_mul(t3, t2, t3);
1220
    /* t3 = e */
1221
4.71k
    fe448_mul(t1, t2, t1);
1222
    /* t1 = f */
1223
14.1k
    fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
1224
    /* t2 = 78 */
1225
4.71k
    fe448_mul(t5, t2, t5);
1226
    /* t5 = 7f */
1227
18.8k
    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1228
    /* t2 = f0 */
1229
4.71k
    fe448_mul(t1, t2, t1);
1230
    /* t1 = ff */
1231
4.71k
    fe448_mul(t3, t3, t2);
1232
    /* t3 = fe */
1233
32.9k
    fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
1234
    /* t2 = 7f80 */
1235
4.71k
    fe448_mul(t5, t2, t5);
1236
    /* t5 = 7fff */
1237
37.7k
    fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
1238
    /* t2 = ff00 */
1239
4.71k
    fe448_mul(t1, t2, t1);
1240
    /* t1 = ffff */
1241
4.71k
    fe448_mul(t3, t3, t2);
1242
    /* t3 = fffe */
1243
70.7k
    fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
1244
    /* t2 = 3fff8000 */
1245
4.71k
    fe448_mul(t5, t2, t5);
1246
    /* t5 = 3fffffff */
1247
75.4k
    fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
1248
    /* t2 = ffff0000 */
1249
4.71k
    fe448_mul(t1, t2, t1);
1250
    /* t1 = ffffffff */
1251
4.71k
    fe448_mul(t3, t3, t2);
1252
    /* t3 = fffffffe */
1253
150k
    fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1254
    /* t2 = ffffffff00000000 */
1255
4.71k
    fe448_mul(t2, t2, t1);
1256
    /* t2 = ffffffffffffffff */
1257
301k
    fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1258
    /* t1 = ffffffffffffffff0000000000000000 */
1259
4.71k
    fe448_mul(t1, t1, t2);
1260
    /* t1 = ffffffffffffffffffffffffffffffff */
1261
301k
    fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1262
    /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
1263
4.71k
    fe448_mul(t4, t1, t2);
1264
    /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
1265
150k
    fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1266
    /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
1267
4.71k
    fe448_mul(t3, t3, t2);
1268
    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1269
905k
    fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
1270
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
1271
4.71k
    fe448_mul(t1, t1, t4);
1272
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
1273
141k
    fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
1274
    /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
1275
4.71k
    fe448_mul(r, t5, t1);
1276
    /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1277
4.71k
}
1278
1279
/* Constant time, conditional move of b into a.
1280
 * a is not changed if the condition is 0.
1281
 *
1282
 * a  A field element.
1283
 * b  A field element.
1284
 * c  If 1 then copy and if 0 then don't copy.
1285
 */
1286
void fe448_cmov(sword64* a, const sword64* b, int c)
1287
9.64M
{
1288
9.64M
    sword64 m = -(sword64)c;
1289
9.64M
    sword64 t0 = m & (a[0] ^ b[0]);
1290
9.64M
    sword64 t1 = m & (a[1] ^ b[1]);
1291
9.64M
    sword64 t2 = m & (a[2] ^ b[2]);
1292
9.64M
    sword64 t3 = m & (a[3] ^ b[3]);
1293
9.64M
    sword64 t4 = m & (a[4] ^ b[4]);
1294
9.64M
    sword64 t5 = m & (a[5] ^ b[5]);
1295
9.64M
    sword64 t6 = m & (a[6] ^ b[6]);
1296
9.64M
    sword64 t7 = m & (a[7] ^ b[7]);
1297
1298
9.64M
    a[0] ^= t0;
1299
9.64M
    a[1] ^= t1;
1300
9.64M
    a[2] ^= t2;
1301
9.64M
    a[3] ^= t3;
1302
9.64M
    a[4] ^= t4;
1303
9.64M
    a[5] ^= t5;
1304
9.64M
    a[6] ^= t6;
1305
9.64M
    a[7] ^= t7;
1306
9.64M
}
1307
1308
#endif /* HAVE_ED448 */
1309
#else
1310
1311
/* Initialize the field element operations.
1312
 */
1313
void fe448_init(void)
1314
{
1315
}
1316
1317
/* Convert the field element from a byte array to an array of 28-bits.
1318
 *
1319
 * r  [in]  Array to encode into.
1320
 * b  [in]  Byte array.
1321
 */
1322
void fe448_from_bytes(sword32* r, const unsigned char* b)
1323
{
1324
    r[ 0] = (sword32)(
1325
             (((word32)((b[ 0]        ) >>  0)) <<  0)
1326
          |  (((word32)((b[ 1]        ) >>  0)) <<  8)
1327
          |  (((word32)((b[ 2]        ) >>  0)) << 16)
1328
          | ((((word32)((b[ 3] & 0xf )) >>  0)) << 24));
1329
    r[ 1] = (sword32)(
1330
             (((word32)((b[ 3]        ) >>  4)) <<  0)
1331
          |  (((word32)((b[ 4]        ) >>  0)) <<  4)
1332
          |  (((word32)((b[ 5]        ) >>  0)) << 12)
1333
          |  (((word32)((b[ 6]        ) >>  0)) << 20));
1334
    r[ 2] = (sword32)(
1335
             (((word32)((b[ 7]        ) >>  0)) <<  0)
1336
          |  (((word32)((b[ 8]        ) >>  0)) <<  8)
1337
          |  (((word32)((b[ 9]        ) >>  0)) << 16)
1338
          | ((((word32)((b[10] & 0xf )) >>  0)) << 24));
1339
    r[ 3] = (sword32)(
1340
             (((word32)((b[10]        ) >>  4)) <<  0)
1341
          |  (((word32)((b[11]        ) >>  0)) <<  4)
1342
          |  (((word32)((b[12]        ) >>  0)) << 12)
1343
          |  (((word32)((b[13]        ) >>  0)) << 20));
1344
    r[ 4] = (sword32)(
1345
             (((word32)((b[14]        ) >>  0)) <<  0)
1346
          |  (((word32)((b[15]        ) >>  0)) <<  8)
1347
          |  (((word32)((b[16]        ) >>  0)) << 16)
1348
          | ((((word32)((b[17] & 0xf )) >>  0)) << 24));
1349
    r[ 5] = (sword32)(
1350
             (((word32)((b[17]        ) >>  4)) <<  0)
1351
          |  (((word32)((b[18]        ) >>  0)) <<  4)
1352
          |  (((word32)((b[19]        ) >>  0)) << 12)
1353
          |  (((word32)((b[20]        ) >>  0)) << 20));
1354
    r[ 6] = (sword32)(
1355
             (((word32)((b[21]        ) >>  0)) <<  0)
1356
          |  (((word32)((b[22]        ) >>  0)) <<  8)
1357
          |  (((word32)((b[23]        ) >>  0)) << 16)
1358
          | ((((word32)((b[24] & 0xf )) >>  0)) << 24));
1359
    r[ 7] = (sword32)(
1360
             (((word32)((b[24]        ) >>  4)) <<  0)
1361
          |  (((word32)((b[25]        ) >>  0)) <<  4)
1362
          |  (((word32)((b[26]        ) >>  0)) << 12)
1363
          |  (((word32)((b[27]        ) >>  0)) << 20));
1364
    r[ 8] = (sword32)(
1365
             (((word32)((b[28]        ) >>  0)) <<  0)
1366
          |  (((word32)((b[29]        ) >>  0)) <<  8)
1367
          |  (((word32)((b[30]        ) >>  0)) << 16)
1368
          | ((((word32)((b[31] & 0xf )) >>  0)) << 24));
1369
    r[ 9] = (sword32)(
1370
             (((word32)((b[31]        ) >>  4)) <<  0)
1371
          |  (((word32)((b[32]        ) >>  0)) <<  4)
1372
          |  (((word32)((b[33]        ) >>  0)) << 12)
1373
          |  (((word32)((b[34]        ) >>  0)) << 20));
1374
    r[10] = (sword32)(
1375
             (((word32)((b[35]        ) >>  0)) <<  0)
1376
          |  (((word32)((b[36]        ) >>  0)) <<  8)
1377
          |  (((word32)((b[37]        ) >>  0)) << 16)
1378
          | ((((word32)((b[38] & 0xf )) >>  0)) << 24));
1379
    r[11] = (sword32)(
1380
             (((word32)((b[38]        ) >>  4)) <<  0)
1381
          |  (((word32)((b[39]        ) >>  0)) <<  4)
1382
          |  (((word32)((b[40]        ) >>  0)) << 12)
1383
          |  (((word32)((b[41]        ) >>  0)) << 20));
1384
    r[12] = (sword32)(
1385
             (((word32)((b[42]        ) >>  0)) <<  0)
1386
          |  (((word32)((b[43]        ) >>  0)) <<  8)
1387
          |  (((word32)((b[44]        ) >>  0)) << 16)
1388
          | ((((word32)((b[45] & 0xf )) >>  0)) << 24));
1389
    r[13] = (sword32)(
1390
             (((word32)((b[45]        ) >>  4)) <<  0)
1391
          |  (((word32)((b[46]        ) >>  0)) <<  4)
1392
          |  (((word32)((b[47]        ) >>  0)) << 12)
1393
          |  (((word32)((b[48]        ) >>  0)) << 20));
1394
    r[14] = (sword32)(
1395
             (((word32)((b[49]        ) >>  0)) <<  0)
1396
          |  (((word32)((b[50]        ) >>  0)) <<  8)
1397
          |  (((word32)((b[51]        ) >>  0)) << 16)
1398
          | ((((word32)((b[52] & 0xf )) >>  0)) << 24));
1399
    r[15] = (sword32)(
1400
             (((word32)((b[52]        ) >>  4)) <<  0)
1401
          |  (((word32)((b[53]        ) >>  0)) <<  4)
1402
          |  (((word32)((b[54]        ) >>  0)) << 12)
1403
          |  (((word32)((b[55]        ) >>  0)) << 20));
1404
}
1405
1406
/* Convert the field element to a byte array from an array of 28-bits.
1407
 *
1408
 * b  [in]  Byte array.
1409
 * a  [in]  Array to encode into.
1410
 */
1411
void fe448_to_bytes(unsigned char* b, const sword32* a)
1412
{
1413
    sword32 t;
1414
    /* Mod */
1415
    sword32 in0 = a[0];
1416
    sword32 in1 = a[1];
1417
    sword32 in2 = a[2];
1418
    sword32 in3 = a[3];
1419
    sword32 in4 = a[4];
1420
    sword32 in5 = a[5];
1421
    sword32 in6 = a[6];
1422
    sword32 in7 = a[7];
1423
    sword32 in8 = a[8];
1424
    sword32 in9 = a[9];
1425
    sword32 in10 = a[10];
1426
    sword32 in11 = a[11];
1427
    sword32 in12 = a[12];
1428
    sword32 in13 = a[13];
1429
    sword32 in14 = a[14];
1430
    sword32 in15 = a[15];
1431
    sword32 o = in15 >> 28;
1432
    in15 -= (sword32)((word32)o << 28);
1433
    in0 += o;
1434
    in8 += o;
1435
    o = (in0 + 1) >> 28;
1436
    o = (o + in1) >> 28;
1437
    o = (o + in2) >> 28;
1438
    o = (o + in3) >> 28;
1439
    o = (o + in4) >> 28;
1440
    o = (o + in5) >> 28;
1441
    o = (o + in6) >> 28;
1442
    o = (o + in7) >> 28;
1443
    o = (o + in8 + 1) >> 28;
1444
    o = (o + in9) >> 28;
1445
    o = (o + in10) >> 28;
1446
    o = (o + in11) >> 28;
1447
    o = (o + in12) >> 28;
1448
    o = (o + in13) >> 28;
1449
    o = (o + in14) >> 28;
1450
    o = (o + in15) >> 28;
1451
    in0 += o;
1452
    in8 += o;
1453
    in15 -= (sword32)((word32)o << 28);
1454
    o = (sword32)(in0  >> 28); in1  += o;
1455
    t = (sword32)((word32)o << 28); in0  -= t;
1456
    o = (sword32)(in1  >> 28); in2  += o;
1457
    t = (sword32)((word32)o << 28); in1  -= t;
1458
    o = (sword32)(in2  >> 28); in3  += o;
1459
    t = (sword32)((word32)o << 28); in2  -= t;
1460
    o = (sword32)(in3  >> 28); in4  += o;
1461
    t = (sword32)((word32)o << 28); in3  -= t;
1462
    o = (sword32)(in4  >> 28); in5  += o;
1463
    t = (sword32)((word32)o << 28); in4  -= t;
1464
    o = (sword32)(in5  >> 28); in6  += o;
1465
    t = (sword32)((word32)o << 28); in5  -= t;
1466
    o = (sword32)(in6  >> 28); in7  += o;
1467
    t = (sword32)((word32)o << 28); in6  -= t;
1468
    o = (sword32)(in7  >> 28); in8  += o;
1469
    t = (sword32)((word32)o << 28); in7  -= t;
1470
    o = (sword32)(in8  >> 28); in9  += o;
1471
    t = (sword32)((word32)o << 28); in8  -= t;
1472
    o = (sword32)(in9  >> 28); in10 += o;
1473
    t = (sword32)((word32)o << 28); in9  -= t;
1474
    o = (sword32)(in10 >> 28); in11 += o;
1475
    t = (sword32)((word32)o << 28); in10 -= t;
1476
    o = (sword32)(in11 >> 28); in12 += o;
1477
    t = (sword32)((word32)o << 28); in11 -= t;
1478
    o = (sword32)(in12 >> 28); in13 += o;
1479
    t = (sword32)((word32)o << 28); in12 -= t;
1480
    o = (sword32)(in13 >> 28); in14 += o;
1481
    t = (sword32)((word32)o << 28); in13 -= t;
1482
    o = (sword32)(in14 >> 28); in15 += o;
1483
    t = (sword32)((word32)o << 28); in14 -= t;
1484
    o = (sword32)(in15 >> 28); in0  += o;
1485
                               in8  += o;
1486
    t = (sword32)((word32)o << 28); in15 -= t;
1487
1488
    /* Output as bytes */
1489
    b[ 0] = (byte)(in0  >>  0);
1490
    b[ 1] = (byte)(in0  >>  8);
1491
    b[ 2] = (byte)(in0  >> 16);
1492
    b[ 3] = (byte)((byte)(in0  >> 24) + (byte)((byte)(in1  >>  0) <<  4));
1493
    b[ 4] = (byte)(in1  >>  4);
1494
    b[ 5] = (byte)(in1  >> 12);
1495
    b[ 6] = (byte)(in1  >> 20);
1496
    b[ 7] = (byte)(in2  >>  0);
1497
    b[ 8] = (byte)(in2  >>  8);
1498
    b[ 9] = (byte)(in2  >> 16);
1499
    b[10] = (byte)((byte)(in2  >> 24) + (byte)((byte)(in3  >>  0) <<  4));
1500
    b[11] = (byte)(in3  >>  4);
1501
    b[12] = (byte)(in3  >> 12);
1502
    b[13] = (byte)(in3  >> 20);
1503
    b[14] = (byte)(in4  >>  0);
1504
    b[15] = (byte)(in4  >>  8);
1505
    b[16] = (byte)(in4  >> 16);
1506
    b[17] = (byte)((byte)(in4  >> 24) + (byte)((byte)(in5  >>  0) <<  4));
1507
    b[18] = (byte)(in5  >>  4);
1508
    b[19] = (byte)(in5  >> 12);
1509
    b[20] = (byte)(in5  >> 20);
1510
    b[21] = (byte)(in6  >>  0);
1511
    b[22] = (byte)(in6  >>  8);
1512
    b[23] = (byte)(in6  >> 16);
1513
    b[24] = (byte)((byte)(in6  >> 24) + (byte)((byte)(in7  >>  0) <<  4));
1514
    b[25] = (byte)(in7  >>  4);
1515
    b[26] = (byte)(in7  >> 12);
1516
    b[27] = (byte)(in7  >> 20);
1517
    b[28] = (byte)(in8  >>  0);
1518
    b[29] = (byte)(in8  >>  8);
1519
    b[30] = (byte)(in8  >> 16);
1520
    b[31] = (byte)((byte)(in8  >> 24) + (byte)((byte)(in9  >>  0) <<  4));
1521
    b[32] = (byte)(in9  >>  4);
1522
    b[33] = (byte)(in9  >> 12);
1523
    b[34] = (byte)(in9  >> 20);
1524
    b[35] = (byte)(in10 >>  0);
1525
    b[36] = (byte)(in10 >>  8);
1526
    b[37] = (byte)(in10 >> 16);
1527
    b[38] = (byte)((byte)(in10 >> 24) + (byte)((byte)(in11 >>  0) <<  4));
1528
    b[39] = (byte)(in11 >>  4);
1529
    b[40] = (byte)(in11 >> 12);
1530
    b[41] = (byte)(in11 >> 20);
1531
    b[42] = (byte)(in12 >>  0);
1532
    b[43] = (byte)(in12 >>  8);
1533
    b[44] = (byte)(in12 >> 16);
1534
    b[45] = (byte)((byte)(in12 >> 24) + (byte)((byte)(in13 >>  0) <<  4));
1535
    b[46] = (byte)(in13 >>  4);
1536
    b[47] = (byte)(in13 >> 12);
1537
    b[48] = (byte)(in13 >> 20);
1538
    b[49] = (byte)(in14 >>  0);
1539
    b[50] = (byte)(in14 >>  8);
1540
    b[51] = (byte)(in14 >> 16);
1541
    b[52] = (byte)((byte)(in14 >> 24) + (byte)((byte)(in15 >>  0) <<  4));
1542
    b[53] = (byte)(in15 >>  4);
1543
    b[54] = (byte)(in15 >> 12);
1544
    b[55] = (byte)(in15 >> 20);
1545
}
1546
1547
/* Set the field element to 0.
1548
 *
1549
 * a  [in]  Field element.
1550
 */
1551
void fe448_1(sword32* a)
1552
{
1553
    a[0] = 1;
1554
    a[1] = 0;
1555
    a[2] = 0;
1556
    a[3] = 0;
1557
    a[4] = 0;
1558
    a[5] = 0;
1559
    a[6] = 0;
1560
    a[7] = 0;
1561
    a[8] = 0;
1562
    a[9] = 0;
1563
    a[10] = 0;
1564
    a[11] = 0;
1565
    a[12] = 0;
1566
    a[13] = 0;
1567
    a[14] = 0;
1568
    a[15] = 0;
1569
}
1570
1571
/* Set the field element to 0.
1572
 *
1573
 * a  [in]  Field element.
1574
 */
1575
void fe448_0(sword32* a)
1576
{
1577
    a[0] = 0;
1578
    a[1] = 0;
1579
    a[2] = 0;
1580
    a[3] = 0;
1581
    a[4] = 0;
1582
    a[5] = 0;
1583
    a[6] = 0;
1584
    a[7] = 0;
1585
    a[8] = 0;
1586
    a[9] = 0;
1587
    a[10] = 0;
1588
    a[11] = 0;
1589
    a[12] = 0;
1590
    a[13] = 0;
1591
    a[14] = 0;
1592
    a[15] = 0;
1593
}
1594
1595
/* Copy one field element into another: d = a.
1596
 *
1597
 * d  [in]  Destination field element.
1598
 * a  [in]  Source field element.
1599
 */
1600
void fe448_copy(sword32* d, const sword32* a)
1601
{
1602
    d[0] = a[0];
1603
    d[1] = a[1];
1604
    d[2] = a[2];
1605
    d[3] = a[3];
1606
    d[4] = a[4];
1607
    d[5] = a[5];
1608
    d[6] = a[6];
1609
    d[7] = a[7];
1610
    d[8] = a[8];
1611
    d[9] = a[9];
1612
    d[10] = a[10];
1613
    d[11] = a[11];
1614
    d[12] = a[12];
1615
    d[13] = a[13];
1616
    d[14] = a[14];
1617
    d[15] = a[15];
1618
}
1619
1620
/* Conditionally swap the elements.
1621
 * Constant time implementation.
1622
 *
1623
 * a  [in]  First field element.
1624
 * b  [in]  Second field element.
1625
 * c  [in]  Swap when 1. Valid values: 0, 1.
1626
 */
1627
static void fe448_cswap(sword32* a, sword32* b, int c)
1628
{
1629
    sword32 mask = -(sword32)c;
1630
    sword32 t0 = (a[0] ^ b[0]) & mask;
1631
    sword32 t1 = (a[1] ^ b[1]) & mask;
1632
    sword32 t2 = (a[2] ^ b[2]) & mask;
1633
    sword32 t3 = (a[3] ^ b[3]) & mask;
1634
    sword32 t4 = (a[4] ^ b[4]) & mask;
1635
    sword32 t5 = (a[5] ^ b[5]) & mask;
1636
    sword32 t6 = (a[6] ^ b[6]) & mask;
1637
    sword32 t7 = (a[7] ^ b[7]) & mask;
1638
    sword32 t8 = (a[8] ^ b[8]) & mask;
1639
    sword32 t9 = (a[9] ^ b[9]) & mask;
1640
    sword32 t10 = (a[10] ^ b[10]) & mask;
1641
    sword32 t11 = (a[11] ^ b[11]) & mask;
1642
    sword32 t12 = (a[12] ^ b[12]) & mask;
1643
    sword32 t13 = (a[13] ^ b[13]) & mask;
1644
    sword32 t14 = (a[14] ^ b[14]) & mask;
1645
    sword32 t15 = (a[15] ^ b[15]) & mask;
1646
    a[0] ^= t0;
1647
    a[1] ^= t1;
1648
    a[2] ^= t2;
1649
    a[3] ^= t3;
1650
    a[4] ^= t4;
1651
    a[5] ^= t5;
1652
    a[6] ^= t6;
1653
    a[7] ^= t7;
1654
    a[8] ^= t8;
1655
    a[9] ^= t9;
1656
    a[10] ^= t10;
1657
    a[11] ^= t11;
1658
    a[12] ^= t12;
1659
    a[13] ^= t13;
1660
    a[14] ^= t14;
1661
    a[15] ^= t15;
1662
    b[0] ^= t0;
1663
    b[1] ^= t1;
1664
    b[2] ^= t2;
1665
    b[3] ^= t3;
1666
    b[4] ^= t4;
1667
    b[5] ^= t5;
1668
    b[6] ^= t6;
1669
    b[7] ^= t7;
1670
    b[8] ^= t8;
1671
    b[9] ^= t9;
1672
    b[10] ^= t10;
1673
    b[11] ^= t11;
1674
    b[12] ^= t12;
1675
    b[13] ^= t13;
1676
    b[14] ^= t14;
1677
    b[15] ^= t15;
1678
}
1679
1680
/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
1681
 *
1682
 * r  [in]  Field element to hold sum.
1683
 * a  [in]  Field element to add.
1684
 * b  [in]  Field element to add.
1685
 */
1686
void fe448_add(sword32* r, const sword32* a, const sword32* b)
1687
{
1688
    r[0] = a[0] + b[0];
1689
    r[1] = a[1] + b[1];
1690
    r[2] = a[2] + b[2];
1691
    r[3] = a[3] + b[3];
1692
    r[4] = a[4] + b[4];
1693
    r[5] = a[5] + b[5];
1694
    r[6] = a[6] + b[6];
1695
    r[7] = a[7] + b[7];
1696
    r[8] = a[8] + b[8];
1697
    r[9] = a[9] + b[9];
1698
    r[10] = a[10] + b[10];
1699
    r[11] = a[11] + b[11];
1700
    r[12] = a[12] + b[12];
1701
    r[13] = a[13] + b[13];
1702
    r[14] = a[14] + b[14];
1703
    r[15] = a[15] + b[15];
1704
}
1705
1706
/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
1707
 *
1708
 * r  [in]  Field element to hold difference.
1709
 * a  [in]  Field element to subtract from.
1710
 * b  [in]  Field element to subtract.
1711
 */
1712
void fe448_sub(sword32* r, const sword32* a, const sword32* b)
1713
{
1714
    r[0] = a[0] - b[0];
1715
    r[1] = a[1] - b[1];
1716
    r[2] = a[2] - b[2];
1717
    r[3] = a[3] - b[3];
1718
    r[4] = a[4] - b[4];
1719
    r[5] = a[5] - b[5];
1720
    r[6] = a[6] - b[6];
1721
    r[7] = a[7] - b[7];
1722
    r[8] = a[8] - b[8];
1723
    r[9] = a[9] - b[9];
1724
    r[10] = a[10] - b[10];
1725
    r[11] = a[11] - b[11];
1726
    r[12] = a[12] - b[12];
1727
    r[13] = a[13] - b[13];
1728
    r[14] = a[14] - b[14];
1729
    r[15] = a[15] - b[15];
1730
}
1731
1732
void fe448_reduce(sword32* a)
1733
{
1734
    sword64 o;
1735
1736
    o = a[0 ] >> 28; a[1 ] += (sword32)o; a[0 ] -= (sword32)((word32)o << 28);
1737
    o = a[1 ] >> 28; a[2 ] += (sword32)o; a[1 ] -= (sword32)((word32)o << 28);
1738
    o = a[2 ] >> 28; a[3 ] += (sword32)o; a[2 ] -= (sword32)((word32)o << 28);
1739
    o = a[3 ] >> 28; a[4 ] += (sword32)o; a[3 ] -= (sword32)((word32)o << 28);
1740
    o = a[4 ] >> 28; a[5 ] += (sword32)o; a[4 ] -= (sword32)((word32)o << 28);
1741
    o = a[5 ] >> 28; a[6 ] += (sword32)o; a[5 ] -= (sword32)((word32)o << 28);
1742
    o = a[6 ] >> 28; a[7 ] += (sword32)o; a[6 ] -= (sword32)((word32)o << 28);
1743
    o = a[7 ] >> 28; a[8 ] += (sword32)o; a[7 ] -= (sword32)((word32)o << 28);
1744
    o = a[8 ] >> 28; a[9 ] += (sword32)o; a[8 ] -= (sword32)((word32)o << 28);
1745
    o = a[9 ] >> 28; a[10] += (sword32)o; a[9 ] -= (sword32)((word32)o << 28);
1746
    o = a[10] >> 28; a[11] += (sword32)o; a[10] -= (sword32)((word32)o << 28);
1747
    o = a[11] >> 28; a[12] += (sword32)o; a[11] -= (sword32)((word32)o << 28);
1748
    o = a[12] >> 28; a[13] += (sword32)o; a[12] -= (sword32)((word32)o << 28);
1749
    o = a[13] >> 28; a[14] += (sword32)o; a[13] -= (sword32)((word32)o << 28);
1750
    o = a[14] >> 28; a[15] += (sword32)o; a[14] -= (sword32)((word32)o << 28);
1751
    o = a[15] >> 28; a[0]  += (sword32)o;
1752
                     a[8]  += (sword32)o; a[15] -= (sword32)((word32)o << 28);
1753
}
1754
/* Multiply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
1755
 *
1756
 * r  [in]  Field element to hold result.
1757
 * a  [in]  Field element to multiply.
1758
 */
1759
void fe448_mul39081(sword32* r, const sword32* a)
1760
{
1761
    sword64 t;
1762
    sword32 o;
1763
    sword64 t0 = a[0] * (sword64)39081;
1764
    sword64 t1 = a[1] * (sword64)39081;
1765
    sword64 t2 = a[2] * (sword64)39081;
1766
    sword64 t3 = a[3] * (sword64)39081;
1767
    sword64 t4 = a[4] * (sword64)39081;
1768
    sword64 t5 = a[5] * (sword64)39081;
1769
    sword64 t6 = a[6] * (sword64)39081;
1770
    sword64 t7 = a[7] * (sword64)39081;
1771
    sword64 t8 = a[8] * (sword64)39081;
1772
    sword64 t9 = a[9] * (sword64)39081;
1773
    sword64 t10 = a[10] * (sword64)39081;
1774
    sword64 t11 = a[11] * (sword64)39081;
1775
    sword64 t12 = a[12] * (sword64)39081;
1776
    sword64 t13 = a[13] * (sword64)39081;
1777
    sword64 t14 = a[14] * (sword64)39081;
1778
    sword64 t15 = a[15] * (sword64)39081;
1779
    o = (sword32)(t0  >> 28); t1  += o;
1780
    t = (sword64)((word64)o << 28); t0  -= t;
1781
    o = (sword32)(t1  >> 28); t2  += o;
1782
    t = (sword64)((word64)o << 28); t1  -= t;
1783
    o = (sword32)(t2  >> 28); t3  += o;
1784
    t = (sword64)((word64)o << 28); t2  -= t;
1785
    o = (sword32)(t3  >> 28); t4  += o;
1786
    t = (sword64)((word64)o << 28); t3  -= t;
1787
    o = (sword32)(t4  >> 28); t5  += o;
1788
    t = (sword64)((word64)o << 28); t4  -= t;
1789
    o = (sword32)(t5  >> 28); t6  += o;
1790
    t = (sword64)((word64)o << 28); t5  -= t;
1791
    o = (sword32)(t6  >> 28); t7  += o;
1792
    t = (sword64)((word64)o << 28); t6  -= t;
1793
    o = (sword32)(t7  >> 28); t8  += o;
1794
    t = (sword64)((word64)o << 28); t7  -= t;
1795
    o = (sword32)(t8  >> 28); t9  += o;
1796
    t = (sword64)((word64)o << 28); t8  -= t;
1797
    o = (sword32)(t9  >> 28); t10 += o;
1798
    t = (sword64)((word64)o << 28); t9  -= t;
1799
    o = (sword32)(t10 >> 28); t11 += o;
1800
    t = (sword64)((word64)o << 28); t10 -= t;
1801
    o = (sword32)(t11 >> 28); t12 += o;
1802
    t = (sword64)((word64)o << 28); t11 -= t;
1803
    o = (sword32)(t12 >> 28); t13 += o;
1804
    t = (sword64)((word64)o << 28); t12 -= t;
1805
    o = (sword32)(t13 >> 28); t14 += o;
1806
    t = (sword64)((word64)o << 28); t13 -= t;
1807
    o = (sword32)(t14 >> 28); t15 += o;
1808
    t = (sword64)((word64)o << 28); t14 -= t;
1809
    o = (sword32)(t15 >> 28); t0  += o;
1810
                              t8  += o;
1811
    t = (sword64)((word64)o << 28); t15 -= t;
1812
1813
    /* Store */
1814
    r[0] = (sword32)t0;
1815
    r[1] = (sword32)t1;
1816
    r[2] = (sword32)t2;
1817
    r[3] = (sword32)t3;
1818
    r[4] = (sword32)t4;
1819
    r[5] = (sword32)t5;
1820
    r[6] = (sword32)t6;
1821
    r[7] = (sword32)t7;
1822
    r[8] = (sword32)t8;
1823
    r[9] = (sword32)t9;
1824
    r[10] = (sword32)t10;
1825
    r[11] = (sword32)t11;
1826
    r[12] = (sword32)t12;
1827
    r[13] = (sword32)t13;
1828
    r[14] = (sword32)t14;
1829
    r[15] = (sword32)t15;
1830
}
1831
1832
/* Multiply two field elements. r = a * b
1833
 *
1834
 * r  [in]  Field element to hold result.
1835
 * a  [in]  Field element to multiply.
1836
 * b  [in]  Field element to multiply.
1837
 */
1838
static WC_INLINE void fe448_mul_8(sword32* r, const sword32* a,
1839
    const sword32* b)
1840
{
1841
    sword64 t;
1842
    sword64 o;
1843
    sword64 t15;
1844
    sword64 t0   = (sword64)a[ 0] * b[ 0];
1845
    sword64 t1   = (sword64)a[ 0] * b[ 1];
1846
    sword64 t101 = (sword64)a[ 1] * b[ 0];
1847
    sword64 t2   = (sword64)a[ 0] * b[ 2];
1848
    sword64 t102 = (sword64)a[ 1] * b[ 1];
1849
    sword64 t202 = (sword64)a[ 2] * b[ 0];
1850
    sword64 t3   = (sword64)a[ 0] * b[ 3];
1851
    sword64 t103 = (sword64)a[ 1] * b[ 2];
1852
    sword64 t203 = (sword64)a[ 2] * b[ 1];
1853
    sword64 t303 = (sword64)a[ 3] * b[ 0];
1854
    sword64 t4   = (sword64)a[ 0] * b[ 4];
1855
    sword64 t104 = (sword64)a[ 1] * b[ 3];
1856
    sword64 t204 = (sword64)a[ 2] * b[ 2];
1857
    sword64 t304 = (sword64)a[ 3] * b[ 1];
1858
    sword64 t404 = (sword64)a[ 4] * b[ 0];
1859
    sword64 t5   = (sword64)a[ 0] * b[ 5];
1860
    sword64 t105 = (sword64)a[ 1] * b[ 4];
1861
    sword64 t205 = (sword64)a[ 2] * b[ 3];
1862
    sword64 t305 = (sword64)a[ 3] * b[ 2];
1863
    sword64 t405 = (sword64)a[ 4] * b[ 1];
1864
    sword64 t505 = (sword64)a[ 5] * b[ 0];
1865
    sword64 t6   = (sword64)a[ 0] * b[ 6];
1866
    sword64 t106 = (sword64)a[ 1] * b[ 5];
1867
    sword64 t206 = (sword64)a[ 2] * b[ 4];
1868
    sword64 t306 = (sword64)a[ 3] * b[ 3];
1869
    sword64 t406 = (sword64)a[ 4] * b[ 2];
1870
    sword64 t506 = (sword64)a[ 5] * b[ 1];
1871
    sword64 t606 = (sword64)a[ 6] * b[ 0];
1872
    sword64 t7   = (sword64)a[ 0] * b[ 7];
1873
    sword64 t107 = (sword64)a[ 1] * b[ 6];
1874
    sword64 t207 = (sword64)a[ 2] * b[ 5];
1875
    sword64 t307 = (sword64)a[ 3] * b[ 4];
1876
    sword64 t407 = (sword64)a[ 4] * b[ 3];
1877
    sword64 t507 = (sword64)a[ 5] * b[ 2];
1878
    sword64 t607 = (sword64)a[ 6] * b[ 1];
1879
    sword64 t707 = (sword64)a[ 7] * b[ 0];
1880
    sword64 t8   = (sword64)a[ 1] * b[ 7];
1881
    sword64 t108 = (sword64)a[ 2] * b[ 6];
1882
    sword64 t208 = (sword64)a[ 3] * b[ 5];
1883
    sword64 t308 = (sword64)a[ 4] * b[ 4];
1884
    sword64 t408 = (sword64)a[ 5] * b[ 3];
1885
    sword64 t508 = (sword64)a[ 6] * b[ 2];
1886
    sword64 t608 = (sword64)a[ 7] * b[ 1];
1887
    sword64 t9   = (sword64)a[ 2] * b[ 7];
1888
    sword64 t109 = (sword64)a[ 3] * b[ 6];
1889
    sword64 t209 = (sword64)a[ 4] * b[ 5];
1890
    sword64 t309 = (sword64)a[ 5] * b[ 4];
1891
    sword64 t409 = (sword64)a[ 6] * b[ 3];
1892
    sword64 t509 = (sword64)a[ 7] * b[ 2];
1893
    sword64 t10  = (sword64)a[ 3] * b[ 7];
1894
    sword64 t110 = (sword64)a[ 4] * b[ 6];
1895
    sword64 t210 = (sword64)a[ 5] * b[ 5];
1896
    sword64 t310 = (sword64)a[ 6] * b[ 4];
1897
    sword64 t410 = (sword64)a[ 7] * b[ 3];
1898
    sword64 t11  = (sword64)a[ 4] * b[ 7];
1899
    sword64 t111 = (sword64)a[ 5] * b[ 6];
1900
    sword64 t211 = (sword64)a[ 6] * b[ 5];
1901
    sword64 t311 = (sword64)a[ 7] * b[ 4];
1902
    sword64 t12  = (sword64)a[ 5] * b[ 7];
1903
    sword64 t112 = (sword64)a[ 6] * b[ 6];
1904
    sword64 t212 = (sword64)a[ 7] * b[ 5];
1905
    sword64 t13  = (sword64)a[ 6] * b[ 7];
1906
    sword64 t113 = (sword64)a[ 7] * b[ 6];
1907
    sword64 t14  = (sword64)a[ 7] * b[ 7];
1908
    t1  += t101;
1909
    t2  += t102; t2  += t202;
1910
    t3  += t103; t3  += t203; t3  += t303;
1911
    t4  += t104; t4  += t204; t4  += t304; t4  += t404;
1912
    t5  += t105; t5  += t205; t5  += t305; t5  += t405; t5  += t505;
1913
    t6  += t106; t6  += t206; t6  += t306; t6  += t406; t6  += t506;
1914
    t6  += t606;
1915
    t7  += t107; t7  += t207; t7  += t307; t7  += t407; t7  += t507;
1916
    t7  += t607;
1917
    t8  += t108; t8  += t208; t8  += t308; t8  += t408; t8  += t508;
1918
    t8  += t608;
1919
    t9  += t109; t9  += t209; t9  += t309; t9  += t409; t9  += t509;
1920
    t10 += t110; t10 += t210; t10 += t310; t10 += t410;
1921
    t11 += t111; t11 += t211; t11 += t311;
1922
    t12 += t112; t12 += t212;
1923
    t13 += t113;
1924
    o = t14 >> 28;
1925
    t15 = o;
1926
    t14 -= (sword64)((word64)o << 28);
1927
    o = (t0  >> 28); t1  += o;
1928
    t = (sword64)((word64)o << 28); t0  -= t;
1929
    o = (t1  >> 28); t2  += o;
1930
    t = (sword64)((word64)o << 28); t1  -= t;
1931
    o = (t2  >> 28); t3  += o;
1932
    t = (sword64)((word64)o << 28); t2  -= t;
1933
    o = (t3  >> 28); t4  += o;
1934
    t = (sword64)((word64)o << 28); t3  -= t;
1935
    o = (t4  >> 28); t5  += o;
1936
    t = (sword64)((word64)o << 28); t4  -= t;
1937
    o = (t5  >> 28); t6  += o;
1938
    t = (sword64)((word64)o << 28); t5  -= t;
1939
    o = (t6  >> 28); t7  += o;
1940
    t = (sword64)((word64)o << 28); t6  -= t;
1941
    o = (t707>> 28); t8  += o;
1942
    t = (sword64)((word64)o << 28); t707-= t; t7 += t707;
1943
    o = (t7  >> 28); t8  += o;
1944
    t = (sword64)((word64)o << 28); t7  -= t;
1945
    o = (t8  >> 28); t9  += o;
1946
    t = (sword64)((word64)o << 28); t8  -= t;
1947
    o = (t9  >> 28); t10 += o;
1948
    t = (sword64)((word64)o << 28); t9  -= t;
1949
    o = (t10 >> 28); t11 += o;
1950
    t = (sword64)((word64)o << 28); t10 -= t;
1951
    o = (t11 >> 28); t12 += o;
1952
    t = (sword64)((word64)o << 28); t11 -= t;
1953
    o = (t12 >> 28); t13 += o;
1954
    t = (sword64)((word64)o << 28); t12 -= t;
1955
    o = (t13 >> 28); t14 += o;
1956
    t = (sword64)((word64)o << 28); t13 -= t;
1957
    o = (t14 >> 28); t15 += o;
1958
    t = (sword64)((word64)o << 28); t14 -= t;
1959
    o = (t15 >> 28); t0  += o;
1960
                     t8  += o;
1961
    t = (sword64)((word64)o << 28); t15 -= t;
1962
1963
    /* Store */
1964
    r[0] = (sword32)t0;
1965
    r[1] = (sword32)t1;
1966
    r[2] = (sword32)t2;
1967
    r[3] = (sword32)t3;
1968
    r[4] = (sword32)t4;
1969
    r[5] = (sword32)t5;
1970
    r[6] = (sword32)t6;
1971
    r[7] = (sword32)t7;
1972
    r[8] = (sword32)t8;
1973
    r[9] = (sword32)t9;
1974
    r[10] = (sword32)t10;
1975
    r[11] = (sword32)t11;
1976
    r[12] = (sword32)t12;
1977
    r[13] = (sword32)t13;
1978
    r[14] = (sword32)t14;
1979
    r[15] = (sword32)t15;
1980
}
1981
1982
/* Multiply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
1983
 *
1984
 * r  [in]  Field element to hold result.
1985
 * a  [in]  Field element to multiply.
1986
 * b  [in]  Field element to multiply.
1987
 */
1988
void fe448_mul(sword32* r, const sword32* a, const sword32* b)
1989
{
1990
    sword32 r0[16];
1991
    sword32 r1[16];
1992
    sword32* a1 = r1;
1993
    sword32 b1[8];
1994
    sword32 r2[16];
1995
    a1[0] = a[0] + a[8];
1996
    a1[1] = a[1] + a[9];
1997
    a1[2] = a[2] + a[10];
1998
    a1[3] = a[3] + a[11];
1999
    a1[4] = a[4] + a[12];
2000
    a1[5] = a[5] + a[13];
2001
    a1[6] = a[6] + a[14];
2002
    a1[7] = a[7] + a[15];
2003
    b1[0] = b[0] + b[8];
2004
    b1[1] = b[1] + b[9];
2005
    b1[2] = b[2] + b[10];
2006
    b1[3] = b[3] + b[11];
2007
    b1[4] = b[4] + b[12];
2008
    b1[5] = b[5] + b[13];
2009
    b1[6] = b[6] + b[14];
2010
    b1[7] = b[7] + b[15];
2011
    fe448_mul_8(r2, a + 8, b + 8);
2012
    fe448_mul_8(r0, a, b);
2013
    fe448_mul_8(r1, a1, b1);
2014
    r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
2015
    r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
2016
    r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
2017
    r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
2018
    r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
2019
    r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
2020
    r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
2021
    r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
2022
    r[ 8] = r2[ 8]          + r1[ 0] - r0[ 0] + r1[ 8];
2023
    r[ 9] = r2[ 9]          + r1[ 1] - r0[ 1] + r1[ 9];
2024
    r[10] = r2[10]          + r1[ 2] - r0[ 2] + r1[10];
2025
    r[11] = r2[11]          + r1[ 3] - r0[ 3] + r1[11];
2026
    r[12] = r2[12]          + r1[ 4] - r0[ 4] + r1[12];
2027
    r[13] = r2[13]          + r1[ 5] - r0[ 5] + r1[13];
2028
    r[14] = r2[14]          + r1[ 6] - r0[ 6] + r1[14];
2029
    r[15] = r2[15]          + r1[ 7] - r0[ 7] + r1[15];
2030
}
2031
2032
/* Square a field element. r = a * a
2033
 *
2034
 * r  [in]  Field element to hold result.
2035
 * a  [in]  Field element to square.
2036
 */
2037
static WC_INLINE void fe448_sqr_8(sword32* r, const sword32* a)
2038
{
2039
    sword64 o;
2040
    sword64 t15;
2041
    sword64 t;
2042
    sword64 t0   =     (sword64)a[ 0] * a[ 0];
2043
    sword64 t1   = 2 * (sword64)a[ 0] * a[ 1];
2044
    sword64 t2   = 2 * (sword64)a[ 0] * a[ 2];
2045
    sword64 t102 =     (sword64)a[ 1] * a[ 1];
2046
    sword64 t3   = 2 * (sword64)a[ 0] * a[ 3];
2047
    sword64 t103 = 2 * (sword64)a[ 1] * a[ 2];
2048
    sword64 t4   = 2 * (sword64)a[ 0] * a[ 4];
2049
    sword64 t104 = 2 * (sword64)a[ 1] * a[ 3];
2050
    sword64 t204 =     (sword64)a[ 2] * a[ 2];
2051
    sword64 t5   = 2 * (sword64)a[ 0] * a[ 5];
2052
    sword64 t105 = 2 * (sword64)a[ 1] * a[ 4];
2053
    sword64 t205 = 2 * (sword64)a[ 2] * a[ 3];
2054
    sword64 t6   = 2 * (sword64)a[ 0] * a[ 6];
2055
    sword64 t106 = 2 * (sword64)a[ 1] * a[ 5];
2056
    sword64 t206 = 2 * (sword64)a[ 2] * a[ 4];
2057
    sword64 t306 =     (sword64)a[ 3] * a[ 3];
2058
    sword64 t7   = 2 * (sword64)a[ 0] * a[ 7];
2059
    sword64 t107 = 2 * (sword64)a[ 1] * a[ 6];
2060
    sword64 t207 = 2 * (sword64)a[ 2] * a[ 5];
2061
    sword64 t307 = 2 * (sword64)a[ 3] * a[ 4];
2062
    sword64 t8   = 2 * (sword64)a[ 1] * a[ 7];
2063
    sword64 t108 = 2 * (sword64)a[ 2] * a[ 6];
2064
    sword64 t208 = 2 * (sword64)a[ 3] * a[ 5];
2065
    sword64 t308 =     (sword64)a[ 4] * a[ 4];
2066
    sword64 t9   = 2 * (sword64)a[ 2] * a[ 7];
2067
    sword64 t109 = 2 * (sword64)a[ 3] * a[ 6];
2068
    sword64 t209 = 2 * (sword64)a[ 4] * a[ 5];
2069
    sword64 t10  = 2 * (sword64)a[ 3] * a[ 7];
2070
    sword64 t110 = 2 * (sword64)a[ 4] * a[ 6];
2071
    sword64 t210 =     (sword64)a[ 5] * a[ 5];
2072
    sword64 t11  = 2 * (sword64)a[ 4] * a[ 7];
2073
    sword64 t111 = 2 * (sword64)a[ 5] * a[ 6];
2074
    sword64 t12  = 2 * (sword64)a[ 5] * a[ 7];
2075
    sword64 t112 =     (sword64)a[ 6] * a[ 6];
2076
    sword64 t13  = 2 * (sword64)a[ 6] * a[ 7];
2077
    sword64 t14  =     (sword64)a[ 7] * a[ 7];
2078
    t2  += t102;
2079
    t3  += t103;
2080
    t4  += t104; t4  += t204;
2081
    t5  += t105; t5  += t205;
2082
    t6  += t106; t6  += t206; t6  += t306;
2083
    t7  += t107; t7  += t207;
2084
    t8  += t108; t8  += t208; t8  += t308;
2085
    t9  += t109; t9  += t209;
2086
    t10 += t110; t10 += t210;
2087
    t11 += t111;
2088
    t12 += t112;
2089
    o = t14 >> 28;
2090
    t15 = o;
2091
    t14 -= (sword64)((word64)o << 28);
2092
    o = (t0  >> 28); t1  += o;
2093
    t = (sword64)((word64)o << 28); t0  -= t;
2094
    o = (t1  >> 28); t2  += o;
2095
    t = (sword64)((word64)o << 28); t1  -= t;
2096
    o = (t2  >> 28); t3  += o;
2097
    t = (sword64)((word64)o << 28); t2  -= t;
2098
    o = (t3  >> 28); t4  += o;
2099
    t = (sword64)((word64)o << 28); t3  -= t;
2100
    o = (t4  >> 28); t5  += o;
2101
    t = (sword64)((word64)o << 28); t4  -= t;
2102
    o = (t5  >> 28); t6  += o;
2103
    t = (sword64)((word64)o << 28); t5  -= t;
2104
    o = (t6  >> 28); t7  += o;
2105
    t = (sword64)((word64)o << 28); t6  -= t;
2106
    o = (t307>> 28); t8  += o;
2107
    t = (sword64)((word64)o << 28); t307-= t; t7 += t307;
2108
    o = (t7  >> 28); t8  += o;
2109
    t = (sword64)((word64)o << 28); t7  -= t;
2110
    o = (t8  >> 28); t9  += o;
2111
    t = (sword64)((word64)o << 28); t8  -= t;
2112
    o = (t9  >> 28); t10 += o;
2113
    t = (sword64)((word64)o << 28); t9  -= t;
2114
    o = (t10 >> 28); t11 += o;
2115
    t = (sword64)((word64)o << 28); t10 -= t;
2116
    o = (t11 >> 28); t12 += o;
2117
    t = (sword64)((word64)o << 28); t11 -= t;
2118
    o = (t12 >> 28); t13 += o;
2119
    t = (sword64)((word64)o << 28); t12 -= t;
2120
    o = (t13 >> 28); t14 += o;
2121
    t = (sword64)((word64)o << 28); t13 -= t;
2122
    o = (t14 >> 28); t15 += o;
2123
    t = (sword64)((word64)o << 28); t14 -= t;
2124
    o = (t15 >> 28); t0  += o;
2125
                     t8  += o;
2126
    t = (sword64)((word64)o << 28); t15 -= t;
2127
2128
    /* Store */
2129
    r[0] = (sword32)t0;
2130
    r[1] = (sword32)t1;
2131
    r[2] = (sword32)t2;
2132
    r[3] = (sword32)t3;
2133
    r[4] = (sword32)t4;
2134
    r[5] = (sword32)t5;
2135
    r[6] = (sword32)t6;
2136
    r[7] = (sword32)t7;
2137
    r[8] = (sword32)t8;
2138
    r[9] = (sword32)t9;
2139
    r[10] = (sword32)t10;
2140
    r[11] = (sword32)t11;
2141
    r[12] = (sword32)t12;
2142
    r[13] = (sword32)t13;
2143
    r[14] = (sword32)t14;
2144
    r[15] = (sword32)t15;
2145
}
2146
2147
/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
2148
 *
2149
 * r  [in]  Field element to hold result.
2150
 * a  [in]  Field element to square.
2151
 */
2152
void fe448_sqr(sword32* r, const sword32* a)
2153
{
2154
    sword32 r0[16];
2155
    sword32 r1[16];
2156
    sword32* a1 = r1;
2157
    sword32 r2[16];
2158
    a1[0] = a[0] + a[8];
2159
    a1[1] = a[1] + a[9];
2160
    a1[2] = a[2] + a[10];
2161
    a1[3] = a[3] + a[11];
2162
    a1[4] = a[4] + a[12];
2163
    a1[5] = a[5] + a[13];
2164
    a1[6] = a[6] + a[14];
2165
    a1[7] = a[7] + a[15];
2166
    fe448_sqr_8(r2, a + 8);
2167
    fe448_sqr_8(r0, a);
2168
    fe448_sqr_8(r1, a1);
2169
    r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
2170
    r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
2171
    r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
2172
    r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
2173
    r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
2174
    r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
2175
    r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
2176
    r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
2177
    r[ 8] = r2[ 8]          + r1[ 0] - r0[ 0] + r1[ 8];
2178
    r[ 9] = r2[ 9]          + r1[ 1] - r0[ 1] + r1[ 9];
2179
    r[10] = r2[10]          + r1[ 2] - r0[ 2] + r1[10];
2180
    r[11] = r2[11]          + r1[ 3] - r0[ 3] + r1[11];
2181
    r[12] = r2[12]          + r1[ 4] - r0[ 4] + r1[12];
2182
    r[13] = r2[13]          + r1[ 5] - r0[ 5] + r1[13];
2183
    r[14] = r2[14]          + r1[ 6] - r0[ 6] + r1[14];
2184
    r[15] = r2[15]          + r1[ 7] - r0[ 7] + r1[15];
2185
}
2186
2187
/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
2188
 * Constant time implementation - using Fermat's little theorem:
2189
 *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
2190
 * For curve448: p - 2 = 2^448 - 2^224 - 3
2191
 *
2192
 * r  [in]  Field element to hold result.
2193
 * a  [in]  Field element to invert.
2194
 */
2195
void fe448_invert(sword32* r, const sword32* a)
2196
{
2197
    sword32 t1[16];
2198
    sword32 t2[16];
2199
    sword32 t3[16];
2200
    sword32 t4[16];
2201
    int i;
2202
2203
    fe448_sqr(t1, a);
2204
    /* t1 = 2 */
2205
    fe448_mul(t1, t1, a);
2206
    /* t1 = 3 */
2207
    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2208
    /* t2 = c */
2209
    fe448_mul(t3, t2, a);
2210
    /* t3 = d */
2211
    fe448_mul(t1, t2, t1);
2212
    /* t1 = f */
2213
    fe448_sqr(t2, t1);
2214
    /* t2 = 1e */
2215
    fe448_mul(t4, t2, a);
2216
    /* t4 = 1f */
2217
    fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2218
    /* t2 = 3e0 */
2219
    fe448_mul(t1, t2, t4);
2220
    /* t1 = 3ff */
2221
    fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
2222
    /* t2 = ffc00 */
2223
    fe448_mul(t1, t2, t1);
2224
    /* t1 = fffff */
2225
    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2226
    /* t2 = 1ffffe0 */
2227
    fe448_mul(t1, t2, t4);
2228
    /* t1 = 1ffffff */
2229
    fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
2230
    /* t2 = 3fffffe000000 */
2231
    fe448_mul(t1, t2, t1);
2232
    /* t1 = 3ffffffffffff */
2233
    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2234
    /* t2 = 7fffffffffffe0 */
2235
    fe448_mul(t1, t2, t4);
2236
    /* t1 = 7fffffffffffff */
2237
    fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
2238
    /* t2 = 3fffffffffffff80000000000000 */
2239
    fe448_mul(t1, t2, t1);
2240
    /* t1 = 3fffffffffffffffffffffffffff */
2241
    fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
2242
    /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
2243
    fe448_mul(t1, t2, t1);
2244
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2245
    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2246
    /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
2247
    fe448_mul(t3, t3, t2);
2248
    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2249
    fe448_mul(t1, t3, a);
2250
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2251
    fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
2252
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
2253
    fe448_mul(r, t3, t1);
2254
    /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2255
}
2256
2257
/* Scalar multiply the point by a number. r = n.a
2258
 * Uses Montgomery ladder and only requires the x-ordinate.
2259
 *
2260
 * r  [in]  Field element to hold result.
2261
 * n  [in]  Scalar as an array of bytes.
2262
 * a  [in]  Point to multiply - x-ordinate only.
2263
 */
2264
int curve448(byte* r, const byte* n, const byte* a)
2265
{
2266
    sword32 x1[16];
2267
    sword32 x2[16];
2268
    sword32 z2[16];
2269
    sword32 x3[16];
2270
    sword32 z3[16];
2271
    sword32 t0[16];
2272
    sword32 t1[16];
2273
    int i;
2274
    unsigned int swap;
2275
2276
    fe448_from_bytes(x1, a);
2277
    fe448_1(x2);
2278
    fe448_0(z2);
2279
    fe448_copy(x3, x1);
2280
    fe448_1(z3);
2281
2282
    swap = 0;
2283
    for (i = 447; i >= 0; --i) {
2284
        unsigned int b = (n[i >> 3] >> (i & 7)) & 1;
2285
        swap ^= b;
2286
        fe448_cswap(x2, x3, (int)swap);
2287
        fe448_cswap(z2, z3, (int)swap);
2288
        swap = b;
2289
2290
        /* Montgomery Ladder - double and add */
2291
        fe448_add(t0, x2, z2);
2292
        fe448_reduce(t0);
2293
        fe448_add(t1, x3, z3);
2294
        fe448_reduce(t1);
2295
        fe448_sub(x2, x2, z2);
2296
        fe448_sub(x3, x3, z3);
2297
        fe448_mul(t1, t1, x2);
2298
        fe448_mul(z3, x3, t0);
2299
        fe448_sqr(t0, t0);
2300
        fe448_sqr(x2, x2);
2301
        fe448_add(x3, z3, t1);
2302
        fe448_reduce(x3);
2303
        fe448_sqr(x3, x3);
2304
        fe448_sub(z3, z3, t1);
2305
        fe448_sqr(z3, z3);
2306
        fe448_mul(z3, z3, x1);
2307
        fe448_sub(t1, t0, x2);
2308
        fe448_mul(x2, t0, x2);
2309
        fe448_mul39081(z2, t1);
2310
        fe448_add(z2, t0, z2);
2311
        fe448_mul(z2, z2, t1);
2312
    }
2313
    /* Last two bits are 0 - no final swap check required. */
2314
2315
    fe448_invert(z2, z2);
2316
    fe448_mul(x2, x2, z2);
2317
    fe448_to_bytes(r, x2);
2318
2319
    return 0;
2320
}
2321
2322
#ifdef HAVE_ED448
2323
/* Check whether field element is not 0.
2324
 * Must convert to a normalized form before checking.
2325
 *
2326
 * a  [in]  Field element.
2327
 * returns 0 when zero, and any other value otherwise.
2328
 */
2329
int fe448_isnonzero(const sword32* a)
2330
{
2331
    byte b[56];
2332
    int i;
2333
    byte c = 0;
2334
    fe448_to_bytes(b, a);
2335
    for (i = 0; i < 56; i++)
2336
        c |= b[i];
2337
    return c;
2338
}
2339
2340
/* Check whether field element is negative.
2341
 * Must convert to a normalized form before checking.
2342
 *
2343
 * a  [in]  Field element.
2344
 * returns 1 when negative, and 0 otherwise.
2345
 */
2346
int fe448_isnegative(const sword32* a)
2347
{
2348
    byte b[56];
2349
    fe448_to_bytes(b, a);
2350
    return b[0] & 1;
2351
}
2352
2353
/* Negates the field element. r = -a
2354
 *
2355
 * r  [in]  Field element to hold result.
2356
 * a  [in]  Field element.
2357
 */
2358
void fe448_neg(sword32* r, const sword32* a)
2359
{
2360
    r[0] = -a[0];
2361
    r[1] = -a[1];
2362
    r[2] = -a[2];
2363
    r[3] = -a[3];
2364
    r[4] = -a[4];
2365
    r[5] = -a[5];
2366
    r[6] = -a[6];
2367
    r[7] = -a[7];
2368
    r[8] = -a[8];
2369
    r[9] = -a[9];
2370
    r[10] = -a[10];
2371
    r[11] = -a[11];
2372
    r[12] = -a[12];
2373
    r[13] = -a[13];
2374
    r[14] = -a[14];
2375
    r[15] = -a[15];
2376
}
2377
2378
/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
2379
 * Used for calculating y-ordinate from x-ordinate for Ed448.
2380
 *
2381
 * r  [in]  Field element to hold result.
2382
 * a  [in]  Field element to exponentiate.
2383
 */
2384
void fe448_pow_2_446_222_1(sword32* r, const sword32* a)
2385
{
2386
    sword32 t1[16];
2387
    sword32 t2[16];
2388
    sword32 t3[16];
2389
    sword32 t4[16];
2390
    sword32 t5[16];
2391
    int i;
2392
2393
    fe448_sqr(t3, a);
2394
    /* t3 = 2 */
2395
    fe448_mul(t1, t3, a);
2396
    /* t1 = 3 */
2397
    fe448_sqr(t5, t1);
2398
    /* t5 = 6 */
2399
    fe448_mul(t5, t5, a);
2400
    /* t5 = 7 */
2401
    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2402
    /* t2 = c */
2403
    fe448_mul(t3, t2, t3);
2404
    /* t3 = e */
2405
    fe448_mul(t1, t2, t1);
2406
    /* t1 = f */
2407
    fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
2408
    /* t2 = 78 */
2409
    fe448_mul(t5, t2, t5);
2410
    /* t5 = 7f */
2411
    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2412
    /* t2 = f0 */
2413
    fe448_mul(t1, t2, t1);
2414
    /* t1 = ff */
2415
    fe448_mul(t3, t3, t2);
2416
    /* t3 = fe */
2417
    fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
2418
    /* t2 = 7f80 */
2419
    fe448_mul(t5, t2, t5);
2420
    /* t5 = 7fff */
2421
    fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
2422
    /* t2 = ff00 */
2423
    fe448_mul(t1, t2, t1);
2424
    /* t1 = ffff */
2425
    fe448_mul(t3, t3, t2);
2426
    /* t3 = fffe */
2427
    fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
2428
    /* t2 = 3fff8000 */
2429
    fe448_mul(t5, t2, t5);
2430
    /* t5 = 3fffffff */
2431
    fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
2432
    /* t2 = ffff0000 */
2433
    fe448_mul(t1, t2, t1);
2434
    /* t1 = ffffffff */
2435
    fe448_mul(t3, t3, t2);
2436
    /* t3 = fffffffe */
2437
    fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2438
    /* t2 = ffffffff00000000 */
2439
    fe448_mul(t2, t2, t1);
2440
    /* t2 = ffffffffffffffff */
2441
    fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2442
    /* t1 = ffffffffffffffff0000000000000000 */
2443
    fe448_mul(t1, t1, t2);
2444
    /* t1 = ffffffffffffffffffffffffffffffff */
2445
    fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2446
    /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
2447
    fe448_mul(t4, t1, t2);
2448
    /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
2449
    fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2450
    /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
2451
    fe448_mul(t3, t3, t2);
2452
    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2453
    fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
2454
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
2455
    fe448_mul(t1, t1, t4);
2456
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
2457
    fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
2458
    /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
2459
    fe448_mul(r, t5, t1);
2460
    /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2461
}
2462
2463
/* Constant time, conditional move of b into a.
2464
 * a is not changed if the condition is 0.
2465
 *
2466
 * a  A field element.
2467
 * b  A field element.
2468
 * c  If 1 then copy and if 0 then don't copy.
2469
 */
2470
void fe448_cmov(sword32* a, const sword32* b, int c)
2471
{
2472
    sword32 m = -(sword32)c;
2473
    sword32 t0 = m & (a[0] ^ b[0]);
2474
    sword32 t1 = m & (a[1] ^ b[1]);
2475
    sword32 t2 = m & (a[2] ^ b[2]);
2476
    sword32 t3 = m & (a[3] ^ b[3]);
2477
    sword32 t4 = m & (a[4] ^ b[4]);
2478
    sword32 t5 = m & (a[5] ^ b[5]);
2479
    sword32 t6 = m & (a[6] ^ b[6]);
2480
    sword32 t7 = m & (a[7] ^ b[7]);
2481
    sword32 t8 = m & (a[8] ^ b[8]);
2482
    sword32 t9 = m & (a[9] ^ b[9]);
2483
    sword32 t10 = m & (a[10] ^ b[10]);
2484
    sword32 t11 = m & (a[11] ^ b[11]);
2485
    sword32 t12 = m & (a[12] ^ b[12]);
2486
    sword32 t13 = m & (a[13] ^ b[13]);
2487
    sword32 t14 = m & (a[14] ^ b[14]);
2488
    sword32 t15 = m & (a[15] ^ b[15]);
2489
2490
    a[0] ^= t0;
2491
    a[1] ^= t1;
2492
    a[2] ^= t2;
2493
    a[3] ^= t3;
2494
    a[4] ^= t4;
2495
    a[5] ^= t5;
2496
    a[6] ^= t6;
2497
    a[7] ^= t7;
2498
    a[8] ^= t8;
2499
    a[9] ^= t9;
2500
    a[10] ^= t10;
2501
    a[11] ^= t11;
2502
    a[12] ^= t12;
2503
    a[13] ^= t13;
2504
    a[14] ^= t14;
2505
    a[15] ^= t15;
2506
}
2507
2508
#endif /* HAVE_ED448 */
2509
#endif
2510
2511
#endif /* HAVE_CURVE448 || HAVE_ED448 */