Coverage Report

Created: 2022-08-24 06:28

/src/wolfssl-disable-fastmath/wolfcrypt/src/fe_448.c
Line
Count
Source
1
/* fe_448.c
2
 *
3
 * Copyright (C) 2006-2022 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
23
 * Small implementation based on Daniel Beer's curve25519 public domain work.
24
 * Reworked for curve448 by Sean Parkinson.
25
 */
26
27
#ifdef HAVE_CONFIG_H
28
    #include <config.h>
29
#endif
30
31
#include <wolfssl/wolfcrypt/settings.h>
32
33
#if defined(HAVE_CURVE448) || defined(HAVE_ED448)
34
35
#include <wolfssl/wolfcrypt/fe_448.h>
36
37
#ifdef NO_INLINE
38
    #include <wolfssl/wolfcrypt/misc.h>
39
#else
40
    #define WOLFSSL_MISC_INCLUDED
41
    #include <wolfcrypt/src/misc.c>
42
#endif
43
44
#if defined(CURVE448_SMALL) || defined(ED448_SMALL)
45
46
/* Initialize the field element operations.
47
 */
48
void fe448_init(void)
49
{
50
}
51
52
/* Normalize the field element.
53
 * Ensure result is in range: 0..2^448-2^224-2
54
 *
55
 * a  [in]  Field element in range 0..2^448-1.
56
 */
57
void fe448_norm(word8* a)
58
{
59
    int i;
60
    sword16 c = 0;
61
    sword16 o = 0;
62
63
    for (i = 0; i < 56; i++) {
64
        c += a[i];
65
        if ((i == 0) || (i == 28))
66
            c += 1;
67
        c >>= 8;
68
    }
69
70
    for (i = 0; i < 56; i++) {
71
        if ((i == 0) || (i == 28)) o += c;
72
        o += a[i];
73
        a[i] = (word8)o;
74
        o >>= 8;
75
    }
76
}
77
78
/* Copy one field element into another: d = a.
79
 *
80
 * d  [in]  Destination field element.
81
 * a  [in]  Source field element.
82
 */
83
void fe448_copy(word8* d, const word8* a)
84
{
85
    int i;
86
    for (i = 0; i < 56; i++) {
87
         d[i] = a[i];
88
    }
89
}
90
91
/* Conditionally swap the elements.
92
 * Constant time implementation.
93
 *
94
 * a  [in]  First field element.
95
 * b  [in]  Second field element.
96
 * c  [in]  Swap when 1. Valid values: 0, 1.
97
 */
98
static void fe448_cswap(word8* a, word8* b, int c)
99
{
100
    int i;
101
    word8 mask = -(word8)c;
102
    word8 t[56];
103
104
    for (i = 0; i < 56; i++)
105
        t[i] = (a[i] ^ b[i]) & mask;
106
    for (i = 0; i < 56; i++)
107
        a[i] ^= t[i];
108
    for (i = 0; i < 56; i++)
109
        b[i] ^= t[i];
110
}
111
112
/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
113
 *
114
 * r  [in]  Field element to hold sum.
115
 * a  [in]  Field element to add.
116
 * b  [in]  Field element to add.
117
 */
118
void fe448_add(word8* r, const word8* a, const word8* b)
119
{
120
    int i;
121
    sword16 c = 0;
122
    sword16 o = 0;
123
124
    for (i = 0; i < 56; i++) {
125
        c += a[i];
126
        c += b[i];
127
        r[i] = (word8)c;
128
        c >>= 8;
129
    }
130
131
    for (i = 0; i < 56; i++) {
132
        if ((i == 0) || (i == 28)) o += c;
133
        o += r[i];
134
        r[i] = (word8)o;
135
        o >>= 8;
136
    }
137
}
138
139
/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
140
 *
141
 * r  [in]  Field element to hold difference.
142
 * a  [in]  Field element to subtract from.
143
 * b  [in]  Field element to subtract.
144
 */
145
void fe448_sub(word8* r, const word8* a, const word8* b)
146
{
147
    int i;
148
    sword16 c = 0;
149
    sword16 o = 0;
150
151
    for (i = 0; i < 56; i++) {
152
        if (i == 28)
153
            c += 0x1fc;
154
        else
155
            c += 0x1fe;
156
        c += a[i];
157
        c -= b[i];
158
        r[i] = (word8)c;
159
        c >>= 8;
160
    }
161
162
    for (i = 0; i < 56; i++) {
163
        if ((i == 0) || (i == 28)) o += c;
164
        o += r[i];
165
        r[i] = (word8)o;
166
        o >>= 8;
167
    }
168
}
169
170
/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
171
 *
172
 * r  [in]  Field element to hold result.
173
 * a  [in]  Field element to multiply.
174
 */
175
void fe448_mul39081(word8* r, const word8* a)
176
{
177
    int i;
178
    sword32 c = 0;
179
    sword32 o = 0;
180
181
    for (i = 0; i < 56; i++) {
182
        c += a[i] * (sword32)39081;
183
        r[i] = (word8)c;
184
        c >>= 8;
185
    }
186
187
    for (i = 0; i < 56; i++) {
188
        if ((i == 0) || (i == 28)) o += c;
189
        o += r[i];
190
        r[i] = (word8)o;
191
        o >>= 8;
192
    }
193
}
194
195
/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
196
 *
197
 * r  [in]  Field element to hold result.
198
 * a  [in]  Field element to multiply.
199
 * b  [in]  Field element to multiply.
200
 */
201
void fe448_mul(word8* r, const word8* a, const word8* b)
202
{
203
    int i, k;
204
    sword32 c = 0;
205
    sword16 o = 0, cc = 0;
206
    word8 t[112];
207
208
    for (k = 0; k < 56; k++) {
209
        i = 0;
210
        for (; i <= k; i++) {
211
            c += (sword32)a[i] * b[k - i];
212
        }
213
        t[k] = (word8)c;
214
        c >>= 8;
215
    }
216
    for (; k < 111; k++) {
217
        i = k - 55;
218
        for (; i < 56; i++) {
219
            c += (sword32)a[i] * b[k - i];
220
        }
221
        t[k] = (word8)c;
222
        c >>= 8;
223
    }
224
    t[k] = (word8)c;
225
226
    for (i = 0; i < 28; i++) {
227
        o += t[i];
228
        o += t[i + 56];
229
        o += t[i + 84];
230
        r[i] = (word8)o;
231
        o >>= 8;
232
    }
233
    for (i = 28; i < 56; i++) {
234
        o += t[i];
235
        o += t[i + 56];
236
        o += t[i + 28];
237
        o += t[i + 56];
238
        r[i] = (word8)o;
239
        o >>= 8;
240
    }
241
    for (i = 0; i < 56; i++) {
242
        if ((i == 0) || (i == 28)) cc += o;
243
        cc += r[i];
244
        r[i] = (word8)cc;
245
        cc >>= 8;
246
    }
247
}
248
249
/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
250
 *
251
 * r  [in]  Field element to hold result.
252
 * a  [in]  Field element to square.
253
 */
254
void fe448_sqr(word8* r, const word8* a)
255
{
256
    int i, k;
257
    sword32 c = 0;
258
    sword32 p;
259
    sword16 o = 0, cc = 0;
260
    word8 t[112];
261
262
    for (k = 0; k < 56; k++) {
263
        i = 0;
264
        for (; i <= k; i++) {
265
            if (k - i < i)
266
                break;
267
            p = (sword32)a[i] * a[k - i];
268
            if (k - i != i)
269
                p *= 2;
270
            c += p;
271
        }
272
        t[k] = (word8)c;
273
        c >>= 8;
274
    }
275
    for (; k < 111; k++) {
276
         i = k - 55;
277
        for (; i < 56; i++) {
278
            if (k - i < i)
279
                break;
280
            p = (sword32)a[i] * a[k - i];
281
            if (k - i != i)
282
                p *= 2;
283
            c += p;
284
        }
285
        t[k] = (word8)c;
286
        c >>= 8;
287
    }
288
    t[k] = (word8)c;
289
290
    for (i = 0; i < 28; i++) {
291
        o += t[i];
292
        o += t[i + 56];
293
        o += t[i + 84];
294
        r[i] = (word8)o;
295
        o >>= 8;
296
    }
297
    for (i = 28; i < 56; i++) {
298
        o += t[i];
299
        o += t[i + 56];
300
        o += t[i + 28];
301
        o += t[i + 56];
302
        r[i] = (word8)o;
303
        o >>= 8;
304
    }
305
    for (i = 0; i < 56; i++) {
306
        if ((i == 0) || (i == 28)) cc += o;
307
        cc += r[i];
308
        r[i] = (word8)cc;
309
        cc >>= 8;
310
    }
311
    fe448_norm(r);
312
}
313
314
/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
315
 * Constant time implementation - using Fermat's little theorem:
316
 *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
317
 * For curve448: p - 2 = 2^448 - 2^224 - 3
318
 *
319
 * r  [in]  Field element to hold result.
320
 * a  [in]  Field element to invert.
321
 */
322
void fe448_invert(word8* r, const word8* a)
323
{
324
    int i;
325
    word8 t[56];
326
327
    fe448_sqr(t, a);
328
    fe448_mul(t, t, a);
329
    for (i = 0; i < 221; i++) {
330
        fe448_sqr(t, t);
331
        fe448_mul(t, t, a);
332
    }
333
    fe448_sqr(t, t);
334
    for (i = 0; i < 222; i++) {
335
        fe448_sqr(t, t);
336
        fe448_mul(t, t, a);
337
    }
338
    fe448_sqr(t, t);
339
    fe448_sqr(t, t);
340
    fe448_mul(r, t, a);
341
}
342
343
/* Scalar multiply the point by a number. r = n.a
344
 * Uses Montgomery ladder and only requires the x-ordinate.
345
 *
346
 * r  [in]  Field element to hold result.
347
 * n  [in]  Scalar as an array of bytes.
348
 * a  [in]  Point to multiply - x-ordinate only.
349
 */
350
int curve448(byte* r, const byte* n, const byte* a)
351
{
352
    word8 x1[56];
353
    word8 x2[56] = {1};
354
    word8 z2[56] = {0};
355
    word8 x3[56];
356
    word8 z3[56] = {1};
357
    word8 t0[56];
358
    word8 t1[56];
359
    int i;
360
    unsigned int swap;
361
    unsigned int b;
362
363
    fe448_copy(x1, a);
364
    fe448_copy(x3, a);
365
366
    swap = 0;
367
    for (i = 447; i >= 0; --i) {
368
        b = (n[i >> 3] >> (i & 7)) & 1;
369
        swap ^= b;
370
        fe448_cswap(x2, x3, swap);
371
        fe448_cswap(z2, z3, swap);
372
        swap = b;
373
374
        /* Montgomery Ladder - double and add */
375
        fe448_add(t0, x2, z2);
376
        fe448_add(t1, x3, z3);
377
        fe448_sub(x2, x2, z2);
378
        fe448_sub(x3, x3, z3);
379
        fe448_mul(t1, t1, x2);
380
        fe448_mul(z3, x3, t0);
381
        fe448_sqr(t0, t0);
382
        fe448_sqr(x2, x2);
383
        fe448_add(x3, z3, t1);
384
        fe448_sqr(x3, x3);
385
        fe448_sub(z3, z3, t1);
386
        fe448_sqr(z3, z3);
387
        fe448_mul(z3, z3, x1);
388
        fe448_sub(t1, t0, x2);
389
        fe448_mul(x2, t0, x2);
390
        fe448_mul39081(z2, t1);
391
        fe448_add(z2, t0, z2);
392
        fe448_mul(z2, z2, t1);
393
    }
394
    fe448_cswap(x2, x3, swap);
395
    fe448_cswap(z2, z3, swap);
396
397
    fe448_invert(z2, z2);
398
    fe448_mul(r, x2, z2);
399
    fe448_norm(r);
400
401
    return 0;
402
}
403
404
#ifdef HAVE_ED448
405
/* Check whether field element is not 0.
406
 * Field element must have been normalized before call.
407
 *
408
 * a  [in]  Field element.
409
 * returns 0 when zero, and any other value otherwise.
410
 */
411
int fe448_isnonzero(const word8* a)
412
{
413
    int i;
414
    byte c = 0;
415
    for (i = 0; i < 56; i++)
416
        c |= a[i];
417
    return c;
418
}
419
420
/* Negates the field element. r = -a mod (2^448 - 2^224 - 1)
421
 * Add 0x200 to each element and subtract 2 from next.
422
 * Top element overflow handled by subtracting 2 from index 0 and 28.
423
 *
424
 * r  [in]  Field element to hold result.
425
 * a  [in]  Field element.
426
 */
427
void fe448_neg(word8* r, const word8* a)
428
{
429
    int i;
430
    sword16 c = 0;
431
    sword16 o = 0;
432
433
    for (i = 0; i < 56; i++) {
434
        if (i == 28)
435
            c += 0x1fc;
436
        else
437
            c += 0x1fe;
438
        c -= a[i];
439
        r[i] = (word8)c;
440
        c >>= 8;
441
    }
442
443
    for (i = 0; i < 56; i++) {
444
        if ((i == 0) || (i == 28)) o += c;
445
        o += r[i];
446
        r[i] = (word8)o;
447
        o >>= 8;
448
    }
449
}
450
451
/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
452
 * Used for calcualting y-ordinate from x-ordinate for Ed448.
453
 *
454
 * r  [in]  Field element to hold result.
455
 * a  [in]  Field element to exponentiate.
456
 */
457
void fe448_pow_2_446_222_1(word8* r, const word8* a)
458
{
459
    int i;
460
    word8 t[56];
461
462
    fe448_sqr(t, a);
463
    fe448_mul(t, t, a);
464
    for (i = 0; i < 221; i++) {
465
        fe448_sqr(t, t);
466
        fe448_mul(t, t, a);
467
    }
468
    fe448_sqr(t, t);
469
    for (i = 0; i < 221; i++) {
470
        fe448_sqr(t, t);
471
        fe448_mul(t, t, a);
472
    }
473
    fe448_sqr(t, t);
474
    fe448_mul(r, t, a);
475
}
476
477
/* Constant time, conditional move of b into a.
478
 * a is not changed if the condition is 0.
479
 *
480
 * a  A field element.
481
 * b  A field element.
482
 * c  If 1 then copy and if 0 then don't copy.
483
 */
484
void fe448_cmov(word8* a, const word8* b, int c)
485
{
486
    int i;
487
    word8 m = -(word8)c;
488
    word8 t[56];
489
490
    for (i = 0; i < 56; i++)
491
        t[i] = m & (a[i] ^ b[i]);
492
    for (i = 0; i < 56; i++)
493
        a[i] ^= t[i];
494
}
495
496
#endif /* HAVE_ED448 */
497
#elif defined(CURVED448_128BIT)
498
499
/* Initialize the field element operations.
500
 */
501
void fe448_init(void)
502
1.44k
{
503
1.44k
}
504
505
/* Convert the field element from a byte array to an array of 56-bits.
506
 *
507
 * r  [in]  Array to encode into.
508
 * b  [in]  Byte array.
509
 */
510
void fe448_from_bytes(sword64* r, const unsigned char* b)
511
638
{
512
638
    r[ 0] =  ((sword64) (b[ 0]) <<  0)
513
638
          |  ((sword64) (b[ 1]) <<  8)
514
638
          |  ((sword64) (b[ 2]) << 16)
515
638
          |  ((sword64) (b[ 3]) << 24)
516
638
          |  ((sword64) (b[ 4]) << 32)
517
638
          |  ((sword64) (b[ 5]) << 40)
518
638
          |  ((sword64) (b[ 6]) << 48);
519
638
    r[ 1] =  ((sword64) (b[ 7]) <<  0)
520
638
          |  ((sword64) (b[ 8]) <<  8)
521
638
          |  ((sword64) (b[ 9]) << 16)
522
638
          |  ((sword64) (b[10]) << 24)
523
638
          |  ((sword64) (b[11]) << 32)
524
638
          |  ((sword64) (b[12]) << 40)
525
638
          |  ((sword64) (b[13]) << 48);
526
638
    r[ 2] =  ((sword64) (b[14]) <<  0)
527
638
          |  ((sword64) (b[15]) <<  8)
528
638
          |  ((sword64) (b[16]) << 16)
529
638
          |  ((sword64) (b[17]) << 24)
530
638
          |  ((sword64) (b[18]) << 32)
531
638
          |  ((sword64) (b[19]) << 40)
532
638
          |  ((sword64) (b[20]) << 48);
533
638
    r[ 3] =  ((sword64) (b[21]) <<  0)
534
638
          |  ((sword64) (b[22]) <<  8)
535
638
          |  ((sword64) (b[23]) << 16)
536
638
          |  ((sword64) (b[24]) << 24)
537
638
          |  ((sword64) (b[25]) << 32)
538
638
          |  ((sword64) (b[26]) << 40)
539
638
          |  ((sword64) (b[27]) << 48);
540
638
    r[ 4] =  ((sword64) (b[28]) <<  0)
541
638
          |  ((sword64) (b[29]) <<  8)
542
638
          |  ((sword64) (b[30]) << 16)
543
638
          |  ((sword64) (b[31]) << 24)
544
638
          |  ((sword64) (b[32]) << 32)
545
638
          |  ((sword64) (b[33]) << 40)
546
638
          |  ((sword64) (b[34]) << 48);
547
638
    r[ 5] =  ((sword64) (b[35]) <<  0)
548
638
          |  ((sword64) (b[36]) <<  8)
549
638
          |  ((sword64) (b[37]) << 16)
550
638
          |  ((sword64) (b[38]) << 24)
551
638
          |  ((sword64) (b[39]) << 32)
552
638
          |  ((sword64) (b[40]) << 40)
553
638
          |  ((sword64) (b[41]) << 48);
554
638
    r[ 6] =  ((sword64) (b[42]) <<  0)
555
638
          |  ((sword64) (b[43]) <<  8)
556
638
          |  ((sword64) (b[44]) << 16)
557
638
          |  ((sword64) (b[45]) << 24)
558
638
          |  ((sword64) (b[46]) << 32)
559
638
          |  ((sword64) (b[47]) << 40)
560
638
          |  ((sword64) (b[48]) << 48);
561
638
    r[ 7] =  ((sword64) (b[49]) <<  0)
562
638
          |  ((sword64) (b[50]) <<  8)
563
638
          |  ((sword64) (b[51]) << 16)
564
638
          |  ((sword64) (b[52]) << 24)
565
638
          |  ((sword64) (b[53]) << 32)
566
638
          |  ((sword64) (b[54]) << 40)
567
638
          |  ((sword64) (b[55]) << 48);
568
638
}
569
570
/* Convert the field element to a byte array from an array of 56-bits.
571
 *
572
 * b  [in]  Byte array.
573
 * a  [in]  Array to encode into.
574
 */
575
void fe448_to_bytes(unsigned char* b, const sword64* a)
576
4.82k
{
577
4.82k
    sword128 t;
578
    /* Mod */
579
4.82k
    sword64 in0 = a[0];
580
4.82k
    sword64 in1 = a[1];
581
4.82k
    sword64 in2 = a[2];
582
4.82k
    sword64 in3 = a[3];
583
4.82k
    sword64 in4 = a[4];
584
4.82k
    sword64 in5 = a[5];
585
4.82k
    sword64 in6 = a[6];
586
4.82k
    sword64 in7 = a[7];
587
4.82k
    sword64 o = in7 >> 56;
588
4.82k
    in7 -= o << 56;
589
4.82k
    in0 += o;
590
4.82k
    in4 += o;
591
4.82k
    o = (in0 + 1) >> 56;
592
4.82k
    o = (o + in1) >> 56;
593
4.82k
    o = (o + in2) >> 56;
594
4.82k
    o = (o + in3) >> 56;
595
4.82k
    o = (o + in4 + 1) >> 56;
596
4.82k
    o = (o + in5) >> 56;
597
4.82k
    o = (o + in6) >> 56;
598
4.82k
    o = (o + in7) >> 56;
599
4.82k
    in0 += o;
600
4.82k
    in4 += o;
601
4.82k
    in7 -= o << 56;
602
4.82k
    o = (in0  >> 56); in1  += o; t = o << 56; in0  -= (sword64)t;
603
4.82k
    o = (in1  >> 56); in2  += o; t = o << 56; in1  -= (sword64)t;
604
4.82k
    o = (in2  >> 56); in3  += o; t = o << 56; in2  -= (sword64)t;
605
4.82k
    o = (in3  >> 56); in4  += o; t = o << 56; in3  -= (sword64)t;
606
4.82k
    o = (in4  >> 56); in5  += o; t = o << 56; in4  -= (sword64)t;
607
4.82k
    o = (in5  >> 56); in6  += o; t = o << 56; in5  -= (sword64)t;
608
4.82k
    o = (in6  >> 56); in7  += o; t = o << 56; in6  -= (sword64)t;
609
4.82k
    o = (in7  >> 56); in0  += o;
610
4.82k
                    in4  += o; t = o << 56; in7  -= (sword64)t;
611
612
    /* Output as bytes */
613
4.82k
    b[ 0] = (in0  >>  0);
614
4.82k
    b[ 1] = (in0  >>  8);
615
4.82k
    b[ 2] = (in0  >> 16);
616
4.82k
    b[ 3] = (in0  >> 24);
617
4.82k
    b[ 4] = (in0  >> 32);
618
4.82k
    b[ 5] = (in0  >> 40);
619
4.82k
    b[ 6] = (in0  >> 48);
620
4.82k
    b[ 7] = (in1  >>  0);
621
4.82k
    b[ 8] = (in1  >>  8);
622
4.82k
    b[ 9] = (in1  >> 16);
623
4.82k
    b[10] = (in1  >> 24);
624
4.82k
    b[11] = (in1  >> 32);
625
4.82k
    b[12] = (in1  >> 40);
626
4.82k
    b[13] = (in1  >> 48);
627
4.82k
    b[14] = (in2  >>  0);
628
4.82k
    b[15] = (in2  >>  8);
629
4.82k
    b[16] = (in2  >> 16);
630
4.82k
    b[17] = (in2  >> 24);
631
4.82k
    b[18] = (in2  >> 32);
632
4.82k
    b[19] = (in2  >> 40);
633
4.82k
    b[20] = (in2  >> 48);
634
4.82k
    b[21] = (in3  >>  0);
635
4.82k
    b[22] = (in3  >>  8);
636
4.82k
    b[23] = (in3  >> 16);
637
4.82k
    b[24] = (in3  >> 24);
638
4.82k
    b[25] = (in3  >> 32);
639
4.82k
    b[26] = (in3  >> 40);
640
4.82k
    b[27] = (in3  >> 48);
641
4.82k
    b[28] = (in4  >>  0);
642
4.82k
    b[29] = (in4  >>  8);
643
4.82k
    b[30] = (in4  >> 16);
644
4.82k
    b[31] = (in4  >> 24);
645
4.82k
    b[32] = (in4  >> 32);
646
4.82k
    b[33] = (in4  >> 40);
647
4.82k
    b[34] = (in4  >> 48);
648
4.82k
    b[35] = (in5  >>  0);
649
4.82k
    b[36] = (in5  >>  8);
650
4.82k
    b[37] = (in5  >> 16);
651
4.82k
    b[38] = (in5  >> 24);
652
4.82k
    b[39] = (in5  >> 32);
653
4.82k
    b[40] = (in5  >> 40);
654
4.82k
    b[41] = (in5  >> 48);
655
4.82k
    b[42] = (in6  >>  0);
656
4.82k
    b[43] = (in6  >>  8);
657
4.82k
    b[44] = (in6  >> 16);
658
4.82k
    b[45] = (in6  >> 24);
659
4.82k
    b[46] = (in6  >> 32);
660
4.82k
    b[47] = (in6  >> 40);
661
4.82k
    b[48] = (in6  >> 48);
662
4.82k
    b[49] = (in7  >>  0);
663
4.82k
    b[50] = (in7  >>  8);
664
4.82k
    b[51] = (in7  >> 16);
665
4.82k
    b[52] = (in7  >> 24);
666
4.82k
    b[53] = (in7  >> 32);
667
4.82k
    b[54] = (in7  >> 40);
668
4.82k
    b[55] = (in7  >> 48);
669
4.82k
}
670
671
/* Set the field element to 0.
672
 *
673
 * a  [in]  Field element.
674
 */
675
void fe448_1(sword64* a)
676
175k
{
677
175k
    a[0] = 1;
678
175k
    a[1] = 0;
679
175k
    a[2] = 0;
680
175k
    a[3] = 0;
681
175k
    a[4] = 0;
682
175k
    a[5] = 0;
683
175k
    a[6] = 0;
684
175k
    a[7] = 0;
685
175k
}
686
687
/* Set the field element to 0.
688
 *
689
 * a  [in]  Field element.
690
 */
691
void fe448_0(sword64* a)
692
172k
{
693
172k
    a[0] = 0;
694
172k
    a[1] = 0;
695
172k
    a[2] = 0;
696
172k
    a[3] = 0;
697
172k
    a[4] = 0;
698
172k
    a[5] = 0;
699
172k
    a[6] = 0;
700
172k
    a[7] = 0;
701
172k
}
702
703
/* Copy one field element into another: d = a.
704
 *
705
 * d  [in]  Destination field element.
706
 * a  [in]  Source field element.
707
 */
708
void fe448_copy(sword64* d, const sword64* a)
709
4.30k
{
710
4.30k
    d[0] = a[0];
711
4.30k
    d[1] = a[1];
712
4.30k
    d[2] = a[2];
713
4.30k
    d[3] = a[3];
714
4.30k
    d[4] = a[4];
715
4.30k
    d[5] = a[5];
716
4.30k
    d[6] = a[6];
717
4.30k
    d[7] = a[7];
718
4.30k
}
719
720
/* Conditionally swap the elements.
721
 * Constant time implementation.
722
 *
723
 * a  [in]  First field element.
724
 * b  [in]  Second field element.
725
 * c  [in]  Swap when 1. Valid values: 0, 1.
726
 */
727
static void fe448_cswap(sword64* a, sword64* b, int c)
728
181k
{
729
181k
    sword64 mask = -(sword64)c;
730
181k
    sword64 t0 = (a[0] ^ b[0]) & mask;
731
181k
    sword64 t1 = (a[1] ^ b[1]) & mask;
732
181k
    sword64 t2 = (a[2] ^ b[2]) & mask;
733
181k
    sword64 t3 = (a[3] ^ b[3]) & mask;
734
181k
    sword64 t4 = (a[4] ^ b[4]) & mask;
735
181k
    sword64 t5 = (a[5] ^ b[5]) & mask;
736
181k
    sword64 t6 = (a[6] ^ b[6]) & mask;
737
181k
    sword64 t7 = (a[7] ^ b[7]) & mask;
738
181k
    a[0] ^= t0;
739
181k
    a[1] ^= t1;
740
181k
    a[2] ^= t2;
741
181k
    a[3] ^= t3;
742
181k
    a[4] ^= t4;
743
181k
    a[5] ^= t5;
744
181k
    a[6] ^= t6;
745
181k
    a[7] ^= t7;
746
181k
    b[0] ^= t0;
747
181k
    b[1] ^= t1;
748
181k
    b[2] ^= t2;
749
181k
    b[3] ^= t3;
750
181k
    b[4] ^= t4;
751
181k
    b[5] ^= t5;
752
181k
    b[6] ^= t6;
753
181k
    b[7] ^= t7;
754
181k
}
755
756
/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
757
 *
758
 * r  [in]  Field element to hold sum.
759
 * a  [in]  Field element to add.
760
 * b  [in]  Field element to add.
761
 */
762
void fe448_add(sword64* r, const sword64* a, const sword64* b)
763
1.52M
{
764
1.52M
    r[0] = a[0] + b[0];
765
1.52M
    r[1] = a[1] + b[1];
766
1.52M
    r[2] = a[2] + b[2];
767
1.52M
    r[3] = a[3] + b[3];
768
1.52M
    r[4] = a[4] + b[4];
769
1.52M
    r[5] = a[5] + b[5];
770
1.52M
    r[6] = a[6] + b[6];
771
1.52M
    r[7] = a[7] + b[7];
772
1.52M
}
773
774
/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
775
 *
776
 * r  [in]  Field element to hold difference.
777
 * a  [in]  Field element to subtract from.
778
 * b  [in]  Field element to subtract.
779
 */
780
void fe448_sub(sword64* r, const sword64* a, const sword64* b)
781
1.70M
{
782
1.70M
    r[0] = a[0] - b[0];
783
1.70M
    r[1] = a[1] - b[1];
784
1.70M
    r[2] = a[2] - b[2];
785
1.70M
    r[3] = a[3] - b[3];
786
1.70M
    r[4] = a[4] - b[4];
787
1.70M
    r[5] = a[5] - b[5];
788
1.70M
    r[6] = a[6] - b[6];
789
1.70M
    r[7] = a[7] - b[7];
790
1.70M
}
791
792
/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
793
 *
794
 * r  [in]  Field element to hold result.
795
 * a  [in]  Field element to multiply.
796
 */
797
void fe448_mul39081(sword64* r, const sword64* a)
798
309k
{
799
309k
    sword128 t;
800
309k
    sword64 o;
801
309k
    sword128 t0 = a[0] * (sword128)39081;
802
309k
    sword128 t1 = a[1] * (sword128)39081;
803
309k
    sword128 t2 = a[2] * (sword128)39081;
804
309k
    sword128 t3 = a[3] * (sword128)39081;
805
309k
    sword128 t4 = a[4] * (sword128)39081;
806
309k
    sword128 t5 = a[5] * (sword128)39081;
807
309k
    sword128 t6 = a[6] * (sword128)39081;
808
309k
    sword128 t7 = a[7] * (sword128)39081;
809
309k
    o = (sword64)(t0  >> 56); t1  += o; t = (sword128)o << 56; t0  -= t;
810
309k
    o = (sword64)(t1  >> 56); t2  += o; t = (sword128)o << 56; t1  -= t;
811
309k
    o = (sword64)(t2  >> 56); t3  += o; t = (sword128)o << 56; t2  -= t;
812
309k
    o = (sword64)(t3  >> 56); t4  += o; t = (sword128)o << 56; t3  -= t;
813
309k
    o = (sword64)(t4  >> 56); t5  += o; t = (sword128)o << 56; t4  -= t;
814
309k
    o = (sword64)(t5  >> 56); t6  += o; t = (sword128)o << 56; t5  -= t;
815
309k
    o = (sword64)(t6  >> 56); t7  += o; t = (sword128)o << 56; t6  -= t;
816
309k
    o = (sword64)(t7  >> 56); t0  += o;
817
309k
                   t4  += o; t = (sword128)o << 56; t7  -= t;
818
819
    /* Store */
820
309k
    r[0] = (sword64)t0;
821
309k
    r[1] = (sword64)t1;
822
309k
    r[2] = (sword64)t2;
823
309k
    r[3] = (sword64)t3;
824
309k
    r[4] = (sword64)t4;
825
309k
    r[5] = (sword64)t5;
826
309k
    r[6] = (sword64)t6;
827
309k
    r[7] = (sword64)t7;
828
309k
}
829
830
/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
831
 *
832
 * r  [in]  Field element to hold result.
833
 * a  [in]  Field element to multiply.
834
 * b  [in]  Field element to multiply.
835
 */
836
void fe448_mul(sword64* r, const sword64* a, const sword64* b)
837
2.98M
{
838
2.98M
    sword64 o;
839
2.98M
    sword64 a1[4];
840
2.98M
    sword64 b1[4];
841
2.98M
    sword128 t0;
842
2.98M
    sword128 t1;
843
2.98M
    sword128 t2;
844
2.98M
    sword128 t3;
845
2.98M
    sword128 t4;
846
2.98M
    sword128 t5;
847
2.98M
    sword128 t6;
848
2.98M
    sword128 t7;
849
2.98M
    sword128 t03;
850
2.98M
    sword128 t04;
851
2.98M
    sword128 t13;
852
2.98M
    sword128 t14;
853
854
2.98M
    a1[0] = a[0] + a[4];
855
2.98M
    a1[1] = a[1] + a[5];
856
2.98M
    a1[2] = a[2] + a[6];
857
2.98M
    a1[3] = a[3] + a[7];
858
2.98M
    b1[0] = b[0] + b[4];
859
2.98M
    b1[1] = b[1] + b[5];
860
2.98M
    b1[2] = b[2] + b[6];
861
2.98M
    b1[3] = b[3] + b[7];
862
863
2.98M
    t03 = ((sword128)a[0] * b[3]) + ((sword128)a[1] * b[2])
864
2.98M
       + ((sword128)a[2] * b[1]) + ((sword128)a[3] * b[0]);
865
2.98M
    t04 = ((sword128)a[1] * b[3]) + ((sword128)a[2] * b[2])
866
2.98M
       + ((sword128)a[3] * b[1]);
867
2.98M
    t04 += t03 >> 56;
868
2.98M
    t03 &= 0xffffffffffffffL;
869
2.98M
    t13 = ((sword128)a1[0] * b1[3]) + ((sword128)a1[1] * b1[2])
870
2.98M
       + ((sword128)a1[2] * b1[1]) + ((sword128)a1[3] * b1[0]);
871
2.98M
    t14 = ((sword128)a1[1] * b1[3]) + ((sword128)a1[2] * b1[2])
872
2.98M
       + ((sword128)a1[3] * b1[1]);
873
2.98M
    t14 += t13 >> 56;
874
2.98M
    t13 &= 0xffffffffffffffL;
875
876
2.98M
    t0 = ((sword128)a[0] * b[0]) + ((sword128)a[4] * b[4]) + t14 + -t04;
877
2.98M
    t1 = ((sword128)a[0] * b[1]) + ((sword128)a[1] * b[0])
878
2.98M
       + ((sword128)a[4] * b[5]) + ((sword128)a[5] * b[4])
879
2.98M
       + ((sword128)a1[2] * b1[3]) + ((sword128)a1[3] * b1[2])
880
2.98M
       - ((sword128)a[2] * b[3]) - ((sword128)a[3] * b[2]);
881
2.98M
    o = (sword64)(t0  >> 56); t1 += o; t0 &= 0xffffffffffffffL;
882
2.98M
    t2 = ((sword128)a[0] * b[2]) + ((sword128)a[1] * b[1])
883
2.98M
       + ((sword128)a[2] * b[0]) + ((sword128)a[4] * b[6])
884
2.98M
       + ((sword128)a[5] * b[5]) + ((sword128)a[6] * b[4])
885
2.98M
       + ((sword128)a1[3] * b1[3]) - ((sword128)a[3] * b[3]);
886
2.98M
    o = (sword64)(t1  >> 56); t2 += o; t1 &= 0xffffffffffffffL;
887
2.98M
    t3 = t03 + ((sword128)a[4] * b[7]) + ((sword128)a[5] * b[6])
888
2.98M
       + ((sword128)a[6] * b[5]) + ((sword128)a[7] * b[4]);
889
2.98M
    o = (sword64)(t2  >> 56); t3 += o; t2 &= 0xffffffffffffffL;
890
2.98M
    t4 = ((sword128)a[5] * b[7]) + ((sword128)a[6] * b[6])
891
2.98M
       + ((sword128)a[7] * b[5]) + ((sword128)a1[0] * b1[0])
892
2.98M
       - ((sword128)a[0] * b[0]) + t14;
893
2.98M
    o = (sword64)(t3  >> 56); t4 += o; t3 &= 0xffffffffffffffL;
894
2.98M
    t5 = ((sword128)a[6] * b[7]) + ((sword128)a[7] * b[6])
895
2.98M
       + ((sword128)a1[0] * b1[1]) + ((sword128)a1[1] * b1[0])
896
2.98M
       - ((sword128)a[0] * b[1]) - ((sword128)a[1] * b[0])
897
2.98M
       + ((sword128)a1[2] * b1[3]) + ((sword128)a1[3] * b1[2]);
898
2.98M
    o = (sword64)(t4  >> 56); t5 += o; t4 &= 0xffffffffffffffL;
899
2.98M
    t6 = ((sword128)a[7] * b[7]) + ((sword128)a1[0] * b1[2])
900
2.98M
       + ((sword128)a1[1] * b1[1]) + ((sword128)a1[2] * b1[0])
901
2.98M
       - ((sword128)a[0] * b[2]) - ((sword128)a[1] * b[1])
902
2.98M
       - ((sword128)a[2] * b[0]) + ((sword128)a1[3] * b1[3]);
903
2.98M
    o = (sword64)(t5  >> 56); t6 += o; t5 &= 0xffffffffffffffL;
904
2.98M
    t7 = t13 + -t03;
905
2.98M
    o = (sword64)(t6  >> 56); t7 += o; t6 &= 0xffffffffffffffL;
906
2.98M
    o = (sword64)(t7 >> 56); t0  += o;
907
2.98M
                   t4  += o; t7 &= 0xffffffffffffffL;
908
909
    /* Store */
910
2.98M
    r[0] = (sword64)t0;
911
2.98M
    r[1] = (sword64)t1;
912
2.98M
    r[2] = (sword64)t2;
913
2.98M
    r[3] = (sword64)t3;
914
2.98M
    r[4] = (sword64)t4;
915
2.98M
    r[5] = (sword64)t5;
916
2.98M
    r[6] = (sword64)t6;
917
2.98M
    r[7] = (sword64)t7;
918
2.98M
}
919
920
/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
921
 *
922
 * r  [in]  Field element to hold result.
923
 * a  [in]  Field element to square.
924
 */
925
void fe448_sqr(sword64* r, const sword64* a)
926
2.36M
{
927
2.36M
    sword64 o;
928
2.36M
    sword64 a1[4];
929
2.36M
    sword128 t0;
930
2.36M
    sword128 t1;
931
2.36M
    sword128 t2;
932
2.36M
    sword128 t3;
933
2.36M
    sword128 t4;
934
2.36M
    sword128 t5;
935
2.36M
    sword128 t6;
936
2.36M
    sword128 t7;
937
2.36M
    sword128 t03;
938
2.36M
    sword128 t04;
939
2.36M
    sword128 t13;
940
2.36M
    sword128 t14;
941
942
2.36M
    a1[0] = a[0] + a[4];
943
2.36M
    a1[1] = a[1] + a[5];
944
2.36M
    a1[2] = a[2] + a[6];
945
2.36M
    a1[3] = a[3] + a[7];
946
947
2.36M
    t03 = ((sword128)a[0] * (2 * a[3])) + ((sword128)a[1] * (2 * a[2]));
948
2.36M
    t04 = ((sword128)a[1] * (2 * a[3])) + ((sword128)a[2] * a[2]);
949
2.36M
    t04 += t03 >> 56;
950
2.36M
    t03 &= 0xffffffffffffffL;
951
2.36M
    t13 = ((sword128)a1[0] * (2 * a1[3])) + ((sword128)a1[1] * (2 * a1[2]));
952
2.36M
    t14 = ((sword128)a1[1] * (2 * a1[3])) + ((sword128)a1[2] * a1[2]);
953
2.36M
    t14 += t13 >> 56;
954
2.36M
    t13 &= 0xffffffffffffffL;
955
956
2.36M
    t0 = ((sword128)a[0] * a[0]) + ((sword128)a[4] * a[4]) + t14 + -t04;
957
2.36M
    t1 = ((sword128)a[0] * (2 * a[1])) + ((sword128)a[4] * (2 * a[5]))
958
2.36M
       + ((sword128)a1[2] * (2 * a1[3])) - ((sword128)a[2] * (2 * a[3]));
959
2.36M
    o = (sword64)(t0  >> 56); t1 += o; t0 &= 0xffffffffffffffL;
960
2.36M
    t2 = ((sword128)a[0] * (2 * a[2])) + ((sword128)a[1] * a[1])
961
2.36M
       + ((sword128)a[4] * (2 * a[6])) + ((sword128)a[5] * a[5])
962
2.36M
       + ((sword128)a1[3] * a1[3]) - ((sword128)a[3] * a[3]);
963
2.36M
    o = (sword64)(t1  >> 56); t2 += o; t1 &= 0xffffffffffffffL;
964
2.36M
    t3 = t03 + ((sword128)a[4] * (2 * a[7])) + ((sword128)a[5] * (2 * a[6]));
965
2.36M
    o = (sword64)(t2  >> 56); t3 += o; t2 &= 0xffffffffffffffL;
966
2.36M
    t4 = ((sword128)a[5] * (2 * a[7])) + ((sword128)a[6] * a[6])
967
2.36M
       + ((sword128)a1[0] * a1[0]) - ((sword128)a[0] * a[0]) + t14;
968
2.36M
    o = (sword64)(t3  >> 56); t4 += o; t3 &= 0xffffffffffffffL;
969
2.36M
    t5 = ((sword128)a[6] * (2 * a[7])) + ((sword128)a1[0] * (2 * a1[1]))
970
2.36M
       - ((sword128)a[0] * (2 * a[1])) + ((sword128)a1[2] * (2 * a1[3]));
971
2.36M
    o = (sword64)(t4  >> 56); t5 += o; t4 &= 0xffffffffffffffL;
972
2.36M
    t6 = ((sword128)a[7] * a[7]) + ((sword128)a1[0] * (2 * a1[2]))
973
2.36M
       + ((sword128)a1[1] * a1[1]) - ((sword128)a[0] * (2 * a[2]))
974
2.36M
       - ((sword128)a[1] * a[1]) + ((sword128)a1[3] * a1[3]);
975
2.36M
    o = (sword64)(t5  >> 56); t6 += o; t5 &= 0xffffffffffffffL;
976
2.36M
    t7 = t13 + -t03;
977
2.36M
    o = (sword64)(t6  >> 56); t7 += o; t6 &= 0xffffffffffffffL;
978
2.36M
    o = (sword64)(t7 >> 56); t0  += o;
979
2.36M
                   t4  += o; t7 &= 0xffffffffffffffL;
980
981
    /* Store */
982
2.36M
    r[0] = (sword64)t0;
983
2.36M
    r[1] = (sword64)t1;
984
2.36M
    r[2] = (sword64)t2;
985
2.36M
    r[3] = (sword64)t3;
986
2.36M
    r[4] = (sword64)t4;
987
2.36M
    r[5] = (sword64)t5;
988
2.36M
    r[6] = (sword64)t6;
989
2.36M
    r[7] = (sword64)t7;
990
2.36M
}
991
992
/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
993
 * Constant time implementation - using Fermat's little theorem:
994
 *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
995
 * For curve448: p - 2 = 2^448 - 2^224 - 3
996
 *
997
 * r  [in]  Field element to hold result.
998
 * a  [in]  Field element to invert.
999
 */
1000
void fe448_invert(sword64* r, const sword64* a)
1001
2.07k
{
1002
2.07k
    sword64 t1[8];
1003
2.07k
    sword64 t2[8];
1004
2.07k
    sword64 t3[8];
1005
2.07k
    sword64 t4[8];
1006
2.07k
    int i;
1007
1008
2.07k
    fe448_sqr(t1, a);
1009
    /* t1 = 2 */
1010
2.07k
    fe448_mul(t1, t1, a);
1011
    /* t1 = 3 */
1012
4.15k
    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1013
    /* t2 = c */
1014
2.07k
    fe448_mul(t3, t2, a);
1015
    /* t3 = d */
1016
2.07k
    fe448_mul(t1, t2, t1);
1017
    /* t1 = f */
1018
2.07k
    fe448_sqr(t2, t1);
1019
    /* t2 = 1e */
1020
2.07k
    fe448_mul(t4, t2, a);
1021
    /* t4 = 1f */
1022
10.3k
    fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1023
    /* t2 = 3e0 */
1024
2.07k
    fe448_mul(t1, t2, t4);
1025
    /* t1 = 3ff */
1026
20.7k
    fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
1027
    /* t2 = ffc00 */
1028
2.07k
    fe448_mul(t1, t2, t1);
1029
    /* t1 = fffff */
1030
10.3k
    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1031
    /* t2 = 1ffffe0 */
1032
2.07k
    fe448_mul(t1, t2, t4);
1033
    /* t1 = 1ffffff */
1034
51.9k
    fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
1035
    /* t2 = 3fffffe000000 */
1036
2.07k
    fe448_mul(t1, t2, t1);
1037
    /* t1 = 3ffffffffffff */
1038
10.3k
    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1039
    /* t2 = 7fffffffffffe0 */
1040
2.07k
    fe448_mul(t1, t2, t4);
1041
    /* t1 = 7fffffffffffff */
1042
114k
    fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
1043
    /* t2 = 3fffffffffffff80000000000000 */
1044
2.07k
    fe448_mul(t1, t2, t1);
1045
    /* t1 = 3fffffffffffffffffffffffffff */
1046
228k
    fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
1047
    /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
1048
2.07k
    fe448_mul(t1, t2, t1);
1049
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1050
8.31k
    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1051
    /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
1052
2.07k
    fe448_mul(t3, t3, t2);
1053
    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1054
2.07k
    fe448_mul(t1, t3, a);
1055
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1056
465k
    fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
1057
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
1058
2.07k
    fe448_mul(r, t3, t1);
1059
    /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1060
2.07k
}
1061
1062
/* Scalar multiply the point by a number. r = n.a
1063
 * Uses Montgomery ladder and only requires the x-ordinate.
1064
 *
1065
 * r  [in]  Field element to hold result.
1066
 * n  [in]  Scalar as an array of bytes.
1067
 * a  [in]  Point to multiply - x-ordinate only.
1068
 */
1069
int curve448(byte* r, const byte* n, const byte* a)
1070
203
{
1071
203
    sword64 x1[8];
1072
203
    sword64 x2[8];
1073
203
    sword64 z2[8];
1074
203
    sword64 x3[8];
1075
203
    sword64 z3[8];
1076
203
    sword64 t0[8];
1077
203
    sword64 t1[8];
1078
203
    int i;
1079
203
    unsigned int swap;
1080
203
    unsigned int b;
1081
1082
203
    fe448_from_bytes(x1, a);
1083
203
    fe448_1(x2);
1084
203
    fe448_0(z2);
1085
203
    fe448_copy(x3, x1);
1086
203
    fe448_1(z3);
1087
1088
203
    swap = 0;
1089
91.1k
    for (i = 447; i >= 0; --i) {
1090
90.9k
        b = (n[i >> 3] >> (i & 7)) & 1;
1091
90.9k
        swap ^= b;
1092
90.9k
        fe448_cswap(x2, x3, swap);
1093
90.9k
        fe448_cswap(z2, z3, swap);
1094
90.9k
        swap = b;
1095
1096
        /* Montgomery Ladder - double and add */
1097
90.9k
        fe448_add(t0, x2, z2);
1098
90.9k
        fe448_reduce(t0);
1099
90.9k
        fe448_add(t1, x3, z3);
1100
90.9k
        fe448_reduce(t1);
1101
90.9k
        fe448_sub(x2, x2, z2);
1102
90.9k
        fe448_sub(x3, x3, z3);
1103
90.9k
        fe448_mul(t1, t1, x2);
1104
90.9k
        fe448_mul(z3, x3, t0);
1105
90.9k
        fe448_sqr(t0, t0);
1106
90.9k
        fe448_sqr(x2, x2);
1107
90.9k
        fe448_add(x3, z3, t1);
1108
90.9k
        fe448_reduce(x3);
1109
90.9k
        fe448_sqr(x3, x3);
1110
90.9k
        fe448_sub(z3, z3, t1);
1111
90.9k
        fe448_sqr(z3, z3);
1112
90.9k
        fe448_mul(z3, z3, x1);
1113
90.9k
        fe448_sub(t1, t0, x2);
1114
90.9k
        fe448_mul(x2, t0, x2);
1115
90.9k
        fe448_mul39081(z2, t1);
1116
90.9k
        fe448_add(z2, t0, z2);
1117
90.9k
        fe448_mul(z2, z2, t1);
1118
90.9k
    }
1119
    /* Last two bits are 0 - no final swap check required. */
1120
1121
203
    fe448_invert(z2, z2);
1122
203
    fe448_mul(x2, x2, z2);
1123
203
    fe448_to_bytes(r, x2);
1124
1125
203
    return 0;
1126
203
}
1127
1128
#ifdef HAVE_ED448
1129
/* Check whether field element is not 0.
1130
 * Must convert to a normalized form before checking.
1131
 *
1132
 * a  [in]  Field element.
1133
 * returns 0 when zero, and any other value otherwise.
1134
 */
1135
int fe448_isnonzero(const sword64* a)
1136
435
{
1137
435
    byte b[56];
1138
435
    int i;
1139
435
    byte c = 0;
1140
435
    fe448_to_bytes(b, a);
1141
24.7k
    for (i = 0; i < 56; i++)
1142
24.3k
        c |= b[i];
1143
435
    return c;
1144
435
}
1145
1146
/* Check whether field element is negative.
1147
 * Must convert to a normalized form before checking.
1148
 *
1149
 * a  [in]  Field element.
1150
 * returns 1 when negative, and 0 otherwise.
1151
 */
1152
int fe448_isnegative(const sword64* a)
1153
2.31k
{
1154
2.31k
    byte b[56];
1155
2.31k
    fe448_to_bytes(b, a);
1156
2.31k
    return b[0] & 1;
1157
2.31k
}
1158
1159
/* Negates the field element. r = -a
1160
 *
1161
 * r  [in]  Field element to hold result.
1162
 * a  [in]  Field element.
1163
 */
1164
void fe448_neg(sword64* r, const sword64* a)
1165
173k
{
1166
173k
    r[0] = -a[0];
1167
173k
    r[1] = -a[1];
1168
173k
    r[2] = -a[2];
1169
173k
    r[3] = -a[3];
1170
173k
    r[4] = -a[4];
1171
173k
    r[5] = -a[5];
1172
173k
    r[6] = -a[6];
1173
173k
    r[7] = -a[7];
1174
173k
}
1175
1176
/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
1177
 * Used for calcualting y-ordinate from x-ordinate for Ed448.
1178
 *
1179
 * r  [in]  Field element to hold result.
1180
 * a  [in]  Field element to exponentiate.
1181
 */
1182
void fe448_pow_2_446_222_1(sword64* r, const sword64* a)
1183
435
{
1184
435
    sword64 t1[8];
1185
435
    sword64 t2[8];
1186
435
    sword64 t3[8];
1187
435
    sword64 t4[8];
1188
435
    sword64 t5[8];
1189
435
    int i;
1190
1191
435
    fe448_sqr(t3, a);
1192
    /* t3 = 2 */
1193
435
    fe448_mul(t1, t3, a);
1194
    /* t1 = 3 */
1195
435
    fe448_sqr(t5, t1);
1196
    /* t5 = 6 */
1197
435
    fe448_mul(t5, t5, a);
1198
    /* t5 = 7 */
1199
870
    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1200
    /* t2 = c */
1201
435
    fe448_mul(t3, t2, t3);
1202
    /* t3 = e */
1203
435
    fe448_mul(t1, t2, t1);
1204
    /* t1 = f */
1205
1.30k
    fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
1206
    /* t2 = 78 */
1207
435
    fe448_mul(t5, t2, t5);
1208
    /* t5 = 7f */
1209
1.74k
    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1210
    /* t2 = f0 */
1211
435
    fe448_mul(t1, t2, t1);
1212
    /* t1 = ff */
1213
435
    fe448_mul(t3, t3, t2);
1214
    /* t3 = fe */
1215
3.04k
    fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
1216
    /* t2 = 7f80 */
1217
435
    fe448_mul(t5, t2, t5);
1218
    /* t5 = 7fff */
1219
3.48k
    fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
1220
    /* t2 = ff00 */
1221
435
    fe448_mul(t1, t2, t1);
1222
    /* t1 = ffff */
1223
435
    fe448_mul(t3, t3, t2);
1224
    /* t3 = fffe */
1225
6.52k
    fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
1226
    /* t2 = 3fff8000 */
1227
435
    fe448_mul(t5, t2, t5);
1228
    /* t5 = 3fffffff */
1229
6.96k
    fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
1230
    /* t2 = ffff0000 */
1231
435
    fe448_mul(t1, t2, t1);
1232
    /* t1 = ffffffff */
1233
435
    fe448_mul(t3, t3, t2);
1234
    /* t3 = fffffffe */
1235
13.9k
    fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1236
    /* t2 = ffffffff00000000 */
1237
435
    fe448_mul(t2, t2, t1);
1238
    /* t2 = ffffffffffffffff */
1239
27.8k
    fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1240
    /* t1 = ffffffffffffffff0000000000000000 */
1241
435
    fe448_mul(t1, t1, t2);
1242
    /* t1 = ffffffffffffffffffffffffffffffff */
1243
27.8k
    fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1244
    /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
1245
435
    fe448_mul(t4, t1, t2);
1246
    /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
1247
13.9k
    fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1248
    /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
1249
435
    fe448_mul(t3, t3, t2);
1250
    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1251
83.5k
    fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
1252
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
1253
435
    fe448_mul(t1, t1, t4);
1254
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
1255
13.0k
    fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
1256
    /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
1257
435
    fe448_mul(r, t5, t1);
1258
    /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1259
435
}
1260
1261
/* Constant time, conditional move of b into a.
1262
 * a is not changed if the condition is 0.
1263
 *
1264
 * a  A field element.
1265
 * b  A field element.
1266
 * c  If 1 then copy and if 0 then don't copy.
1267
 */
1268
void fe448_cmov(sword64* a, const sword64* b, int c)
1269
2.92M
{
1270
2.92M
    sword64 m = -(sword64)c;
1271
2.92M
    sword64 t0 = m & (a[0] ^ b[0]);
1272
2.92M
    sword64 t1 = m & (a[1] ^ b[1]);
1273
2.92M
    sword64 t2 = m & (a[2] ^ b[2]);
1274
2.92M
    sword64 t3 = m & (a[3] ^ b[3]);
1275
2.92M
    sword64 t4 = m & (a[4] ^ b[4]);
1276
2.92M
    sword64 t5 = m & (a[5] ^ b[5]);
1277
2.92M
    sword64 t6 = m & (a[6] ^ b[6]);
1278
2.92M
    sword64 t7 = m & (a[7] ^ b[7]);
1279
1280
2.92M
    a[0] ^= t0;
1281
2.92M
    a[1] ^= t1;
1282
2.92M
    a[2] ^= t2;
1283
2.92M
    a[3] ^= t3;
1284
2.92M
    a[4] ^= t4;
1285
2.92M
    a[5] ^= t5;
1286
2.92M
    a[6] ^= t6;
1287
2.92M
    a[7] ^= t7;
1288
2.92M
}
1289
1290
#endif /* HAVE_ED448 */
1291
#else
1292
1293
/* Initialize the field element operations.
1294
 */
1295
void fe448_init(void)
1296
{
1297
}
1298
1299
/* Convert the field element from a byte array to an array of 28-bits.
1300
 *
1301
 * r  [in]  Array to encode into.
1302
 * b  [in]  Byte array.
1303
 */
1304
void fe448_from_bytes(sword32* r, const unsigned char* b)
1305
{
1306
    r[ 0] =  (((sword32)((b[ 0]        ) >>  0)) <<  0)
1307
          |  (((sword32)((b[ 1]        ) >>  0)) <<  8)
1308
          |  (((sword32)((b[ 2]        ) >>  0)) << 16)
1309
          | ((((sword32)((b[ 3] & 0xf )) >>  0)) << 24);
1310
    r[ 1] =  (((sword32)((b[ 3]        ) >>  4)) <<  0)
1311
          |  (((sword32)((b[ 4]        ) >>  0)) <<  4)
1312
          |  (((sword32)((b[ 5]        ) >>  0)) << 12)
1313
          |  (((sword32)((b[ 6]        ) >>  0)) << 20);
1314
    r[ 2] =  (((sword32)((b[ 7]        ) >>  0)) <<  0)
1315
          |  (((sword32)((b[ 8]        ) >>  0)) <<  8)
1316
          |  (((sword32)((b[ 9]        ) >>  0)) << 16)
1317
          | ((((sword32)((b[10] & 0xf )) >>  0)) << 24);
1318
    r[ 3] =  (((sword32)((b[10]        ) >>  4)) <<  0)
1319
          |  (((sword32)((b[11]        ) >>  0)) <<  4)
1320
          |  (((sword32)((b[12]        ) >>  0)) << 12)
1321
          |  (((sword32)((b[13]        ) >>  0)) << 20);
1322
    r[ 4] =  (((sword32)((b[14]        ) >>  0)) <<  0)
1323
          |  (((sword32)((b[15]        ) >>  0)) <<  8)
1324
          |  (((sword32)((b[16]        ) >>  0)) << 16)
1325
          | ((((sword32)((b[17] & 0xf )) >>  0)) << 24);
1326
    r[ 5] =  (((sword32)((b[17]        ) >>  4)) <<  0)
1327
          |  (((sword32)((b[18]        ) >>  0)) <<  4)
1328
          |  (((sword32)((b[19]        ) >>  0)) << 12)
1329
          |  (((sword32)((b[20]        ) >>  0)) << 20);
1330
    r[ 6] =  (((sword32)((b[21]        ) >>  0)) <<  0)
1331
          |  (((sword32)((b[22]        ) >>  0)) <<  8)
1332
          |  (((sword32)((b[23]        ) >>  0)) << 16)
1333
          | ((((sword32)((b[24] & 0xf )) >>  0)) << 24);
1334
    r[ 7] =  (((sword32)((b[24]        ) >>  4)) <<  0)
1335
          |  (((sword32)((b[25]        ) >>  0)) <<  4)
1336
          |  (((sword32)((b[26]        ) >>  0)) << 12)
1337
          |  (((sword32)((b[27]        ) >>  0)) << 20);
1338
    r[ 8] =  (((sword32)((b[28]        ) >>  0)) <<  0)
1339
          |  (((sword32)((b[29]        ) >>  0)) <<  8)
1340
          |  (((sword32)((b[30]        ) >>  0)) << 16)
1341
          | ((((sword32)((b[31] & 0xf )) >>  0)) << 24);
1342
    r[ 9] =  (((sword32)((b[31]        ) >>  4)) <<  0)
1343
          |  (((sword32)((b[32]        ) >>  0)) <<  4)
1344
          |  (((sword32)((b[33]        ) >>  0)) << 12)
1345
          |  (((sword32)((b[34]        ) >>  0)) << 20);
1346
    r[10] =  (((sword32)((b[35]        ) >>  0)) <<  0)
1347
          |  (((sword32)((b[36]        ) >>  0)) <<  8)
1348
          |  (((sword32)((b[37]        ) >>  0)) << 16)
1349
          | ((((sword32)((b[38] & 0xf )) >>  0)) << 24);
1350
    r[11] =  (((sword32)((b[38]        ) >>  4)) <<  0)
1351
          |  (((sword32)((b[39]        ) >>  0)) <<  4)
1352
          |  (((sword32)((b[40]        ) >>  0)) << 12)
1353
          |  (((sword32)((b[41]        ) >>  0)) << 20);
1354
    r[12] =  (((sword32)((b[42]        ) >>  0)) <<  0)
1355
          |  (((sword32)((b[43]        ) >>  0)) <<  8)
1356
          |  (((sword32)((b[44]        ) >>  0)) << 16)
1357
          | ((((sword32)((b[45] & 0xf )) >>  0)) << 24);
1358
    r[13] =  (((sword32)((b[45]        ) >>  4)) <<  0)
1359
          |  (((sword32)((b[46]        ) >>  0)) <<  4)
1360
          |  (((sword32)((b[47]        ) >>  0)) << 12)
1361
          |  (((sword32)((b[48]        ) >>  0)) << 20);
1362
    r[14] =  (((sword32)((b[49]        ) >>  0)) <<  0)
1363
          |  (((sword32)((b[50]        ) >>  0)) <<  8)
1364
          |  (((sword32)((b[51]        ) >>  0)) << 16)
1365
          | ((((sword32)((b[52] & 0xf )) >>  0)) << 24);
1366
    r[15] =  (((sword32)((b[52]        ) >>  4)) <<  0)
1367
          |  (((sword32)((b[53]        ) >>  0)) <<  4)
1368
          |  (((sword32)((b[54]        ) >>  0)) << 12)
1369
          |  (((sword32)((b[55]        ) >>  0)) << 20);
1370
}
1371
1372
/* Convert the field element to a byte array from an array of 28-bits.
1373
 *
1374
 * b  [in]  Byte array.
1375
 * a  [in]  Array to encode into.
1376
 */
1377
void fe448_to_bytes(unsigned char* b, const sword32* a)
1378
{
1379
    sword64 t;
1380
    /* Mod */
1381
    sword32 in0 = a[0];
1382
    sword32 in1 = a[1];
1383
    sword32 in2 = a[2];
1384
    sword32 in3 = a[3];
1385
    sword32 in4 = a[4];
1386
    sword32 in5 = a[5];
1387
    sword32 in6 = a[6];
1388
    sword32 in7 = a[7];
1389
    sword32 in8 = a[8];
1390
    sword32 in9 = a[9];
1391
    sword32 in10 = a[10];
1392
    sword32 in11 = a[11];
1393
    sword32 in12 = a[12];
1394
    sword32 in13 = a[13];
1395
    sword32 in14 = a[14];
1396
    sword32 in15 = a[15];
1397
    sword32 o = in15 >> 28;
1398
    in15 -= o << 28;
1399
    in0 += o;
1400
    in8 += o;
1401
    o = (in0 + 1) >> 28;
1402
    o = (o + in1) >> 28;
1403
    o = (o + in2) >> 28;
1404
    o = (o + in3) >> 28;
1405
    o = (o + in4) >> 28;
1406
    o = (o + in5) >> 28;
1407
    o = (o + in6) >> 28;
1408
    o = (o + in7) >> 28;
1409
    o = (o + in8 + 1) >> 28;
1410
    o = (o + in9) >> 28;
1411
    o = (o + in10) >> 28;
1412
    o = (o + in11) >> 28;
1413
    o = (o + in12) >> 28;
1414
    o = (o + in13) >> 28;
1415
    o = (o + in14) >> 28;
1416
    o = (o + in15) >> 28;
1417
    in0 += o;
1418
    in8 += o;
1419
    in15 -= o << 28;
1420
    o = (in0  >> 28); in1  += o; t = o << 28; in0  -= (sword32)t;
1421
    o = (in1  >> 28); in2  += o; t = o << 28; in1  -= (sword32)t;
1422
    o = (in2  >> 28); in3  += o; t = o << 28; in2  -= (sword32)t;
1423
    o = (in3  >> 28); in4  += o; t = o << 28; in3  -= (sword32)t;
1424
    o = (in4  >> 28); in5  += o; t = o << 28; in4  -= (sword32)t;
1425
    o = (in5  >> 28); in6  += o; t = o << 28; in5  -= (sword32)t;
1426
    o = (in6  >> 28); in7  += o; t = o << 28; in6  -= (sword32)t;
1427
    o = (in7  >> 28); in8  += o; t = o << 28; in7  -= (sword32)t;
1428
    o = (in8  >> 28); in9  += o; t = o << 28; in8  -= (sword32)t;
1429
    o = (in9  >> 28); in10 += o; t = o << 28; in9  -= (sword32)t;
1430
    o = (in10 >> 28); in11 += o; t = o << 28; in10 -= (sword32)t;
1431
    o = (in11 >> 28); in12 += o; t = o << 28; in11 -= (sword32)t;
1432
    o = (in12 >> 28); in13 += o; t = o << 28; in12 -= (sword32)t;
1433
    o = (in13 >> 28); in14 += o; t = o << 28; in13 -= (sword32)t;
1434
    o = (in14 >> 28); in15 += o; t = o << 28; in14 -= (sword32)t;
1435
    o = (in15 >> 28); in0  += o;
1436
                    in8  += o; t = o << 28; in15 -= (sword32)t;
1437
1438
    /* Output as bytes */
1439
    b[ 0] = (in0  >>  0);
1440
    b[ 1] = (in0  >>  8);
1441
    b[ 2] = (in0  >> 16);
1442
    b[ 3] = (in0  >> 24) + ((in1  >>  0) <<  4);
1443
    b[ 4] = (in1  >>  4);
1444
    b[ 5] = (in1  >> 12);
1445
    b[ 6] = (in1  >> 20);
1446
    b[ 7] = (in2  >>  0);
1447
    b[ 8] = (in2  >>  8);
1448
    b[ 9] = (in2  >> 16);
1449
    b[10] = (in2  >> 24) + ((in3  >>  0) <<  4);
1450
    b[11] = (in3  >>  4);
1451
    b[12] = (in3  >> 12);
1452
    b[13] = (in3  >> 20);
1453
    b[14] = (in4  >>  0);
1454
    b[15] = (in4  >>  8);
1455
    b[16] = (in4  >> 16);
1456
    b[17] = (in4  >> 24) + ((in5  >>  0) <<  4);
1457
    b[18] = (in5  >>  4);
1458
    b[19] = (in5  >> 12);
1459
    b[20] = (in5  >> 20);
1460
    b[21] = (in6  >>  0);
1461
    b[22] = (in6  >>  8);
1462
    b[23] = (in6  >> 16);
1463
    b[24] = (in6  >> 24) + ((in7  >>  0) <<  4);
1464
    b[25] = (in7  >>  4);
1465
    b[26] = (in7  >> 12);
1466
    b[27] = (in7  >> 20);
1467
    b[28] = (in8  >>  0);
1468
    b[29] = (in8  >>  8);
1469
    b[30] = (in8  >> 16);
1470
    b[31] = (in8  >> 24) + ((in9  >>  0) <<  4);
1471
    b[32] = (in9  >>  4);
1472
    b[33] = (in9  >> 12);
1473
    b[34] = (in9  >> 20);
1474
    b[35] = (in10 >>  0);
1475
    b[36] = (in10 >>  8);
1476
    b[37] = (in10 >> 16);
1477
    b[38] = (in10 >> 24) + ((in11 >>  0) <<  4);
1478
    b[39] = (in11 >>  4);
1479
    b[40] = (in11 >> 12);
1480
    b[41] = (in11 >> 20);
1481
    b[42] = (in12 >>  0);
1482
    b[43] = (in12 >>  8);
1483
    b[44] = (in12 >> 16);
1484
    b[45] = (in12 >> 24) + ((in13 >>  0) <<  4);
1485
    b[46] = (in13 >>  4);
1486
    b[47] = (in13 >> 12);
1487
    b[48] = (in13 >> 20);
1488
    b[49] = (in14 >>  0);
1489
    b[50] = (in14 >>  8);
1490
    b[51] = (in14 >> 16);
1491
    b[52] = (in14 >> 24) + ((in15 >>  0) <<  4);
1492
    b[53] = (in15 >>  4);
1493
    b[54] = (in15 >> 12);
1494
    b[55] = (in15 >> 20);
1495
}
1496
1497
/* Set the field element to 0.
1498
 *
1499
 * a  [in]  Field element.
1500
 */
1501
void fe448_1(sword32* a)
1502
{
1503
    a[0] = 1;
1504
    a[1] = 0;
1505
    a[2] = 0;
1506
    a[3] = 0;
1507
    a[4] = 0;
1508
    a[5] = 0;
1509
    a[6] = 0;
1510
    a[7] = 0;
1511
    a[8] = 0;
1512
    a[9] = 0;
1513
    a[10] = 0;
1514
    a[11] = 0;
1515
    a[12] = 0;
1516
    a[13] = 0;
1517
    a[14] = 0;
1518
    a[15] = 0;
1519
}
1520
1521
/* Set the field element to 0.
1522
 *
1523
 * a  [in]  Field element.
1524
 */
1525
void fe448_0(sword32* a)
1526
{
1527
    a[0] = 0;
1528
    a[1] = 0;
1529
    a[2] = 0;
1530
    a[3] = 0;
1531
    a[4] = 0;
1532
    a[5] = 0;
1533
    a[6] = 0;
1534
    a[7] = 0;
1535
    a[8] = 0;
1536
    a[9] = 0;
1537
    a[10] = 0;
1538
    a[11] = 0;
1539
    a[12] = 0;
1540
    a[13] = 0;
1541
    a[14] = 0;
1542
    a[15] = 0;
1543
}
1544
1545
/* Copy one field element into another: d = a.
1546
 *
1547
 * d  [in]  Destination field element.
1548
 * a  [in]  Source field element.
1549
 */
1550
void fe448_copy(sword32* d, const sword32* a)
1551
{
1552
    d[0] = a[0];
1553
    d[1] = a[1];
1554
    d[2] = a[2];
1555
    d[3] = a[3];
1556
    d[4] = a[4];
1557
    d[5] = a[5];
1558
    d[6] = a[6];
1559
    d[7] = a[7];
1560
    d[8] = a[8];
1561
    d[9] = a[9];
1562
    d[10] = a[10];
1563
    d[11] = a[11];
1564
    d[12] = a[12];
1565
    d[13] = a[13];
1566
    d[14] = a[14];
1567
    d[15] = a[15];
1568
}
1569
1570
/* Conditionally swap the elements.
1571
 * Constant time implementation.
1572
 *
1573
 * a  [in]  First field element.
1574
 * b  [in]  Second field element.
1575
 * c  [in]  Swap when 1. Valid values: 0, 1.
1576
 */
1577
static void fe448_cswap(sword32* a, sword32* b, int c)
1578
{
1579
    sword32 mask = -(sword32)c;
1580
    sword32 t0 = (a[0] ^ b[0]) & mask;
1581
    sword32 t1 = (a[1] ^ b[1]) & mask;
1582
    sword32 t2 = (a[2] ^ b[2]) & mask;
1583
    sword32 t3 = (a[3] ^ b[3]) & mask;
1584
    sword32 t4 = (a[4] ^ b[4]) & mask;
1585
    sword32 t5 = (a[5] ^ b[5]) & mask;
1586
    sword32 t6 = (a[6] ^ b[6]) & mask;
1587
    sword32 t7 = (a[7] ^ b[7]) & mask;
1588
    sword32 t8 = (a[8] ^ b[8]) & mask;
1589
    sword32 t9 = (a[9] ^ b[9]) & mask;
1590
    sword32 t10 = (a[10] ^ b[10]) & mask;
1591
    sword32 t11 = (a[11] ^ b[11]) & mask;
1592
    sword32 t12 = (a[12] ^ b[12]) & mask;
1593
    sword32 t13 = (a[13] ^ b[13]) & mask;
1594
    sword32 t14 = (a[14] ^ b[14]) & mask;
1595
    sword32 t15 = (a[15] ^ b[15]) & mask;
1596
    a[0] ^= t0;
1597
    a[1] ^= t1;
1598
    a[2] ^= t2;
1599
    a[3] ^= t3;
1600
    a[4] ^= t4;
1601
    a[5] ^= t5;
1602
    a[6] ^= t6;
1603
    a[7] ^= t7;
1604
    a[8] ^= t8;
1605
    a[9] ^= t9;
1606
    a[10] ^= t10;
1607
    a[11] ^= t11;
1608
    a[12] ^= t12;
1609
    a[13] ^= t13;
1610
    a[14] ^= t14;
1611
    a[15] ^= t15;
1612
    b[0] ^= t0;
1613
    b[1] ^= t1;
1614
    b[2] ^= t2;
1615
    b[3] ^= t3;
1616
    b[4] ^= t4;
1617
    b[5] ^= t5;
1618
    b[6] ^= t6;
1619
    b[7] ^= t7;
1620
    b[8] ^= t8;
1621
    b[9] ^= t9;
1622
    b[10] ^= t10;
1623
    b[11] ^= t11;
1624
    b[12] ^= t12;
1625
    b[13] ^= t13;
1626
    b[14] ^= t14;
1627
    b[15] ^= t15;
1628
}
1629
1630
/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
1631
 *
1632
 * r  [in]  Field element to hold sum.
1633
 * a  [in]  Field element to add.
1634
 * b  [in]  Field element to add.
1635
 */
1636
void fe448_add(sword32* r, const sword32* a, const sword32* b)
1637
{
1638
    r[0] = a[0] + b[0];
1639
    r[1] = a[1] + b[1];
1640
    r[2] = a[2] + b[2];
1641
    r[3] = a[3] + b[3];
1642
    r[4] = a[4] + b[4];
1643
    r[5] = a[5] + b[5];
1644
    r[6] = a[6] + b[6];
1645
    r[7] = a[7] + b[7];
1646
    r[8] = a[8] + b[8];
1647
    r[9] = a[9] + b[9];
1648
    r[10] = a[10] + b[10];
1649
    r[11] = a[11] + b[11];
1650
    r[12] = a[12] + b[12];
1651
    r[13] = a[13] + b[13];
1652
    r[14] = a[14] + b[14];
1653
    r[15] = a[15] + b[15];
1654
}
1655
1656
/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
1657
 *
1658
 * r  [in]  Field element to hold difference.
1659
 * a  [in]  Field element to subtract from.
1660
 * b  [in]  Field element to subtract.
1661
 */
1662
void fe448_sub(sword32* r, const sword32* a, const sword32* b)
1663
{
1664
    r[0] = a[0] - b[0];
1665
    r[1] = a[1] - b[1];
1666
    r[2] = a[2] - b[2];
1667
    r[3] = a[3] - b[3];
1668
    r[4] = a[4] - b[4];
1669
    r[5] = a[5] - b[5];
1670
    r[6] = a[6] - b[6];
1671
    r[7] = a[7] - b[7];
1672
    r[8] = a[8] - b[8];
1673
    r[9] = a[9] - b[9];
1674
    r[10] = a[10] - b[10];
1675
    r[11] = a[11] - b[11];
1676
    r[12] = a[12] - b[12];
1677
    r[13] = a[13] - b[13];
1678
    r[14] = a[14] - b[14];
1679
    r[15] = a[15] - b[15];
1680
}
1681
1682
void fe448_reduce(sword32* a)
1683
{
1684
    sword64 o;
1685
1686
    o = a[0 ] >> 28; a[1 ] += (sword32)o; a[0 ] -= (sword32)(o << 28);
1687
    o = a[1 ] >> 28; a[2 ] += (sword32)o; a[1 ] -= (sword32)(o << 28);
1688
    o = a[2 ] >> 28; a[3 ] += (sword32)o; a[2 ] -= (sword32)(o << 28);
1689
    o = a[3 ] >> 28; a[4 ] += (sword32)o; a[3 ] -= (sword32)(o << 28);
1690
    o = a[4 ] >> 28; a[5 ] += (sword32)o; a[4 ] -= (sword32)(o << 28);
1691
    o = a[5 ] >> 28; a[6 ] += (sword32)o; a[5 ] -= (sword32)(o << 28);
1692
    o = a[6 ] >> 28; a[7 ] += (sword32)o; a[6 ] -= (sword32)(o << 28);
1693
    o = a[7 ] >> 28; a[8 ] += (sword32)o; a[7 ] -= (sword32)(o << 28);
1694
    o = a[8 ] >> 28; a[9 ] += (sword32)o; a[8 ] -= (sword32)(o << 28);
1695
    o = a[9 ] >> 28; a[10] += (sword32)o; a[9 ] -= (sword32)(o << 28);
1696
    o = a[10] >> 28; a[11] += (sword32)o; a[10] -= (sword32)(o << 28);
1697
    o = a[11] >> 28; a[12] += (sword32)o; a[11] -= (sword32)(o << 28);
1698
    o = a[12] >> 28; a[13] += (sword32)o; a[12] -= (sword32)(o << 28);
1699
    o = a[13] >> 28; a[14] += (sword32)o; a[13] -= (sword32)(o << 28);
1700
    o = a[14] >> 28; a[15] += (sword32)o; a[14] -= (sword32)(o << 28);
1701
    o = a[15] >> 28; a[0]  += (sword32)o;
1702
                     a[8]  += (sword32)o; a[15] -= (sword32)(o << 28);
1703
}
1704
/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
1705
 *
1706
 * r  [in]  Field element to hold result.
1707
 * a  [in]  Field element to multiply.
1708
 */
1709
void fe448_mul39081(sword32* r, const sword32* a)
1710
{
1711
    sword64 t;
1712
    sword32 o;
1713
    sword64 t0 = a[0] * (sword64)39081;
1714
    sword64 t1 = a[1] * (sword64)39081;
1715
    sword64 t2 = a[2] * (sword64)39081;
1716
    sword64 t3 = a[3] * (sword64)39081;
1717
    sword64 t4 = a[4] * (sword64)39081;
1718
    sword64 t5 = a[5] * (sword64)39081;
1719
    sword64 t6 = a[6] * (sword64)39081;
1720
    sword64 t7 = a[7] * (sword64)39081;
1721
    sword64 t8 = a[8] * (sword64)39081;
1722
    sword64 t9 = a[9] * (sword64)39081;
1723
    sword64 t10 = a[10] * (sword64)39081;
1724
    sword64 t11 = a[11] * (sword64)39081;
1725
    sword64 t12 = a[12] * (sword64)39081;
1726
    sword64 t13 = a[13] * (sword64)39081;
1727
    sword64 t14 = a[14] * (sword64)39081;
1728
    sword64 t15 = a[15] * (sword64)39081;
1729
    o = (sword32)(t0  >> 28); t1  += o; t = (sword64)o << 28; t0  -= t;
1730
    o = (sword32)(t1  >> 28); t2  += o; t = (sword64)o << 28; t1  -= t;
1731
    o = (sword32)(t2  >> 28); t3  += o; t = (sword64)o << 28; t2  -= t;
1732
    o = (sword32)(t3  >> 28); t4  += o; t = (sword64)o << 28; t3  -= t;
1733
    o = (sword32)(t4  >> 28); t5  += o; t = (sword64)o << 28; t4  -= t;
1734
    o = (sword32)(t5  >> 28); t6  += o; t = (sword64)o << 28; t5  -= t;
1735
    o = (sword32)(t6  >> 28); t7  += o; t = (sword64)o << 28; t6  -= t;
1736
    o = (sword32)(t7  >> 28); t8  += o; t = (sword64)o << 28; t7  -= t;
1737
    o = (sword32)(t8  >> 28); t9  += o; t = (sword64)o << 28; t8  -= t;
1738
    o = (sword32)(t9  >> 28); t10 += o; t = (sword64)o << 28; t9  -= t;
1739
    o = (sword32)(t10 >> 28); t11 += o; t = (sword64)o << 28; t10 -= t;
1740
    o = (sword32)(t11 >> 28); t12 += o; t = (sword64)o << 28; t11 -= t;
1741
    o = (sword32)(t12 >> 28); t13 += o; t = (sword64)o << 28; t12 -= t;
1742
    o = (sword32)(t13 >> 28); t14 += o; t = (sword64)o << 28; t13 -= t;
1743
    o = (sword32)(t14 >> 28); t15 += o; t = (sword64)o << 28; t14 -= t;
1744
    o = (sword32)(t15 >> 28); t0  += o;
1745
                   t8  += o; t = (sword64)o << 28; t15 -= t;
1746
1747
    /* Store */
1748
    r[0] = (sword32)t0;
1749
    r[1] = (sword32)t1;
1750
    r[2] = (sword32)t2;
1751
    r[3] = (sword32)t3;
1752
    r[4] = (sword32)t4;
1753
    r[5] = (sword32)t5;
1754
    r[6] = (sword32)t6;
1755
    r[7] = (sword32)t7;
1756
    r[8] = (sword32)t8;
1757
    r[9] = (sword32)t9;
1758
    r[10] = (sword32)t10;
1759
    r[11] = (sword32)t11;
1760
    r[12] = (sword32)t12;
1761
    r[13] = (sword32)t13;
1762
    r[14] = (sword32)t14;
1763
    r[15] = (sword32)t15;
1764
}
1765
1766
/* Mulitply two field elements. r = a * b
1767
 *
1768
 * r  [in]  Field element to hold result.
1769
 * a  [in]  Field element to multiply.
1770
 * b  [in]  Field element to multiply.
1771
 */
1772
static WC_INLINE void fe448_mul_8(sword32* r, const sword32* a, const sword32* b)
1773
{
1774
    sword64 t;
1775
    sword64 t0   = (sword64)a[ 0] * b[ 0];
1776
    sword64 t1   = (sword64)a[ 0] * b[ 1];
1777
    sword64 t101 = (sword64)a[ 1] * b[ 0];
1778
    sword64 t2   = (sword64)a[ 0] * b[ 2];
1779
    sword64 t102 = (sword64)a[ 1] * b[ 1];
1780
    sword64 t202 = (sword64)a[ 2] * b[ 0];
1781
    sword64 t3   = (sword64)a[ 0] * b[ 3];
1782
    sword64 t103 = (sword64)a[ 1] * b[ 2];
1783
    sword64 t203 = (sword64)a[ 2] * b[ 1];
1784
    sword64 t303 = (sword64)a[ 3] * b[ 0];
1785
    sword64 t4   = (sword64)a[ 0] * b[ 4];
1786
    sword64 t104 = (sword64)a[ 1] * b[ 3];
1787
    sword64 t204 = (sword64)a[ 2] * b[ 2];
1788
    sword64 t304 = (sword64)a[ 3] * b[ 1];
1789
    sword64 t404 = (sword64)a[ 4] * b[ 0];
1790
    sword64 t5   = (sword64)a[ 0] * b[ 5];
1791
    sword64 t105 = (sword64)a[ 1] * b[ 4];
1792
    sword64 t205 = (sword64)a[ 2] * b[ 3];
1793
    sword64 t305 = (sword64)a[ 3] * b[ 2];
1794
    sword64 t405 = (sword64)a[ 4] * b[ 1];
1795
    sword64 t505 = (sword64)a[ 5] * b[ 0];
1796
    sword64 t6   = (sword64)a[ 0] * b[ 6];
1797
    sword64 t106 = (sword64)a[ 1] * b[ 5];
1798
    sword64 t206 = (sword64)a[ 2] * b[ 4];
1799
    sword64 t306 = (sword64)a[ 3] * b[ 3];
1800
    sword64 t406 = (sword64)a[ 4] * b[ 2];
1801
    sword64 t506 = (sword64)a[ 5] * b[ 1];
1802
    sword64 t606 = (sword64)a[ 6] * b[ 0];
1803
    sword64 t7   = (sword64)a[ 0] * b[ 7];
1804
    sword64 t107 = (sword64)a[ 1] * b[ 6];
1805
    sword64 t207 = (sword64)a[ 2] * b[ 5];
1806
    sword64 t307 = (sword64)a[ 3] * b[ 4];
1807
    sword64 t407 = (sword64)a[ 4] * b[ 3];
1808
    sword64 t507 = (sword64)a[ 5] * b[ 2];
1809
    sword64 t607 = (sword64)a[ 6] * b[ 1];
1810
    sword64 t707 = (sword64)a[ 7] * b[ 0];
1811
    sword64 t8   = (sword64)a[ 1] * b[ 7];
1812
    sword64 t108 = (sword64)a[ 2] * b[ 6];
1813
    sword64 t208 = (sword64)a[ 3] * b[ 5];
1814
    sword64 t308 = (sword64)a[ 4] * b[ 4];
1815
    sword64 t408 = (sword64)a[ 5] * b[ 3];
1816
    sword64 t508 = (sword64)a[ 6] * b[ 2];
1817
    sword64 t608 = (sword64)a[ 7] * b[ 1];
1818
    sword64 t9   = (sword64)a[ 2] * b[ 7];
1819
    sword64 t109 = (sword64)a[ 3] * b[ 6];
1820
    sword64 t209 = (sword64)a[ 4] * b[ 5];
1821
    sword64 t309 = (sword64)a[ 5] * b[ 4];
1822
    sword64 t409 = (sword64)a[ 6] * b[ 3];
1823
    sword64 t509 = (sword64)a[ 7] * b[ 2];
1824
    sword64 t10  = (sword64)a[ 3] * b[ 7];
1825
    sword64 t110 = (sword64)a[ 4] * b[ 6];
1826
    sword64 t210 = (sword64)a[ 5] * b[ 5];
1827
    sword64 t310 = (sword64)a[ 6] * b[ 4];
1828
    sword64 t410 = (sword64)a[ 7] * b[ 3];
1829
    sword64 t11  = (sword64)a[ 4] * b[ 7];
1830
    sword64 t111 = (sword64)a[ 5] * b[ 6];
1831
    sword64 t211 = (sword64)a[ 6] * b[ 5];
1832
    sword64 t311 = (sword64)a[ 7] * b[ 4];
1833
    sword64 t12  = (sword64)a[ 5] * b[ 7];
1834
    sword64 t112 = (sword64)a[ 6] * b[ 6];
1835
    sword64 t212 = (sword64)a[ 7] * b[ 5];
1836
    sword64 t13  = (sword64)a[ 6] * b[ 7];
1837
    sword64 t113 = (sword64)a[ 7] * b[ 6];
1838
    sword64 t14  = (sword64)a[ 7] * b[ 7];
1839
    t1  += t101;
1840
    t2  += t102; t2  += t202;
1841
    t3  += t103; t3  += t203; t3  += t303;
1842
    t4  += t104; t4  += t204; t4  += t304; t4  += t404;
1843
    t5  += t105; t5  += t205; t5  += t305; t5  += t405; t5  += t505;
1844
    t6  += t106; t6  += t206; t6  += t306; t6  += t406; t6  += t506;
1845
    t6  += t606;
1846
    t7  += t107; t7  += t207; t7  += t307; t7  += t407; t7  += t507;
1847
    t7  += t607;
1848
    t7  += t707;
1849
    t8  += t108; t8  += t208; t8  += t308; t8  += t408; t8  += t508;
1850
    t8  += t608;
1851
    t9  += t109; t9  += t209; t9  += t309; t9  += t409; t9  += t509;
1852
    t10 += t110; t10 += t210; t10 += t310; t10 += t410;
1853
    t11 += t111; t11 += t211; t11 += t311;
1854
    t12 += t112; t12 += t212;
1855
    t13 += t113;
1856
    sword64 o = t14 >> 28;
1857
    sword64 t15 = o;
1858
    t14 -= o << 28;
1859
    o = (t0  >> 28); t1  += o; t = o << 28; t0  -= t;
1860
    o = (t1  >> 28); t2  += o; t = o << 28; t1  -= t;
1861
    o = (t2  >> 28); t3  += o; t = o << 28; t2  -= t;
1862
    o = (t3  >> 28); t4  += o; t = o << 28; t3  -= t;
1863
    o = (t4  >> 28); t5  += o; t = o << 28; t4  -= t;
1864
    o = (t5  >> 28); t6  += o; t = o << 28; t5  -= t;
1865
    o = (t6  >> 28); t7  += o; t = o << 28; t6  -= t;
1866
    o = (t7  >> 28); t8  += o; t = o << 28; t7  -= t;
1867
    o = (t8  >> 28); t9  += o; t = o << 28; t8  -= t;
1868
    o = (t9  >> 28); t10 += o; t = o << 28; t9  -= t;
1869
    o = (t10 >> 28); t11 += o; t = o << 28; t10 -= t;
1870
    o = (t11 >> 28); t12 += o; t = o << 28; t11 -= t;
1871
    o = (t12 >> 28); t13 += o; t = o << 28; t12 -= t;
1872
    o = (t13 >> 28); t14 += o; t = o << 28; t13 -= t;
1873
    o = (t14 >> 28); t15 += o; t = o << 28; t14 -= t;
1874
    o = (t15 >> 28); t0  += o;
1875
                   t8  += o; t = o << 28; t15 -= t;
1876
1877
    /* Store */
1878
    r[0] = (sword32)t0;
1879
    r[1] = (sword32)t1;
1880
    r[2] = (sword32)t2;
1881
    r[3] = (sword32)t3;
1882
    r[4] = (sword32)t4;
1883
    r[5] = (sword32)t5;
1884
    r[6] = (sword32)t6;
1885
    r[7] = (sword32)t7;
1886
    r[8] = (sword32)t8;
1887
    r[9] = (sword32)t9;
1888
    r[10] = (sword32)t10;
1889
    r[11] = (sword32)t11;
1890
    r[12] = (sword32)t12;
1891
    r[13] = (sword32)t13;
1892
    r[14] = (sword32)t14;
1893
    r[15] = (sword32)t15;
1894
}
1895
1896
/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
1897
 *
1898
 * r  [in]  Field element to hold result.
1899
 * a  [in]  Field element to multiply.
1900
 * b  [in]  Field element to multiply.
1901
 */
1902
void fe448_mul(sword32* r, const sword32* a, const sword32* b)
1903
{
1904
    sword32 r0[16];
1905
    sword32 r1[16];
1906
    sword32* a1 = r1;
1907
    sword32 b1[8];
1908
    sword32 r2[16];
1909
    a1[0] = a[0] + a[8];
1910
    a1[1] = a[1] + a[9];
1911
    a1[2] = a[2] + a[10];
1912
    a1[3] = a[3] + a[11];
1913
    a1[4] = a[4] + a[12];
1914
    a1[5] = a[5] + a[13];
1915
    a1[6] = a[6] + a[14];
1916
    a1[7] = a[7] + a[15];
1917
    b1[0] = b[0] + b[8];
1918
    b1[1] = b[1] + b[9];
1919
    b1[2] = b[2] + b[10];
1920
    b1[3] = b[3] + b[11];
1921
    b1[4] = b[4] + b[12];
1922
    b1[5] = b[5] + b[13];
1923
    b1[6] = b[6] + b[14];
1924
    b1[7] = b[7] + b[15];
1925
    fe448_mul_8(r2, a + 8, b + 8);
1926
    fe448_mul_8(r0, a, b);
1927
    fe448_mul_8(r1, a1, b1);
1928
    r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
1929
    r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
1930
    r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
1931
    r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
1932
    r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
1933
    r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
1934
    r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
1935
    r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
1936
    r[ 8] = r2[ 8]          + r1[ 0] - r0[ 0] + r1[ 8];
1937
    r[ 9] = r2[ 9]          + r1[ 1] - r0[ 1] + r1[ 9];
1938
    r[10] = r2[10]          + r1[ 2] - r0[ 2] + r1[10];
1939
    r[11] = r2[11]          + r1[ 3] - r0[ 3] + r1[11];
1940
    r[12] = r2[12]          + r1[ 4] - r0[ 4] + r1[12];
1941
    r[13] = r2[13]          + r1[ 5] - r0[ 5] + r1[13];
1942
    r[14] = r2[14]          + r1[ 6] - r0[ 6] + r1[14];
1943
    r[15] = r2[15]          + r1[ 7] - r0[ 7] + r1[15];
1944
}
1945
1946
/* Square a field element. r = a * a
1947
 *
1948
 * r  [in]  Field element to hold result.
1949
 * a  [in]  Field element to square.
1950
 */
1951
static WC_INLINE void fe448_sqr_8(sword32* r, const sword32* a)
1952
{
1953
    sword64 o;
1954
    sword64 t15;
1955
    sword64 t;
1956
    sword64 t0   =     (sword64)a[ 0] * a[ 0];
1957
    sword64 t1   = 2 * (sword64)a[ 0] * a[ 1];
1958
    sword64 t2   = 2 * (sword64)a[ 0] * a[ 2];
1959
    sword64 t102 =     (sword64)a[ 1] * a[ 1];
1960
    sword64 t3   = 2 * (sword64)a[ 0] * a[ 3];
1961
    sword64 t103 = 2 * (sword64)a[ 1] * a[ 2];
1962
    sword64 t4   = 2 * (sword64)a[ 0] * a[ 4];
1963
    sword64 t104 = 2 * (sword64)a[ 1] * a[ 3];
1964
    sword64 t204 =     (sword64)a[ 2] * a[ 2];
1965
    sword64 t5   = 2 * (sword64)a[ 0] * a[ 5];
1966
    sword64 t105 = 2 * (sword64)a[ 1] * a[ 4];
1967
    sword64 t205 = 2 * (sword64)a[ 2] * a[ 3];
1968
    sword64 t6   = 2 * (sword64)a[ 0] * a[ 6];
1969
    sword64 t106 = 2 * (sword64)a[ 1] * a[ 5];
1970
    sword64 t206 = 2 * (sword64)a[ 2] * a[ 4];
1971
    sword64 t306 =     (sword64)a[ 3] * a[ 3];
1972
    sword64 t7   = 2 * (sword64)a[ 0] * a[ 7];
1973
    sword64 t107 = 2 * (sword64)a[ 1] * a[ 6];
1974
    sword64 t207 = 2 * (sword64)a[ 2] * a[ 5];
1975
    sword64 t307 = 2 * (sword64)a[ 3] * a[ 4];
1976
    sword64 t8   = 2 * (sword64)a[ 1] * a[ 7];
1977
    sword64 t108 = 2 * (sword64)a[ 2] * a[ 6];
1978
    sword64 t208 = 2 * (sword64)a[ 3] * a[ 5];
1979
    sword64 t308 =     (sword64)a[ 4] * a[ 4];
1980
    sword64 t9   = 2 * (sword64)a[ 2] * a[ 7];
1981
    sword64 t109 = 2 * (sword64)a[ 3] * a[ 6];
1982
    sword64 t209 = 2 * (sword64)a[ 4] * a[ 5];
1983
    sword64 t10  = 2 * (sword64)a[ 3] * a[ 7];
1984
    sword64 t110 = 2 * (sword64)a[ 4] * a[ 6];
1985
    sword64 t210 =     (sword64)a[ 5] * a[ 5];
1986
    sword64 t11  = 2 * (sword64)a[ 4] * a[ 7];
1987
    sword64 t111 = 2 * (sword64)a[ 5] * a[ 6];
1988
    sword64 t12  = 2 * (sword64)a[ 5] * a[ 7];
1989
    sword64 t112 =     (sword64)a[ 6] * a[ 6];
1990
    sword64 t13  = 2 * (sword64)a[ 6] * a[ 7];
1991
    sword64 t14  =     (sword64)a[ 7] * a[ 7];
1992
    t2  += t102;
1993
    t3  += t103;
1994
    t4  += t104; t4  += t204;
1995
    t5  += t105; t5  += t205;
1996
    t6  += t106; t6  += t206; t6  += t306;
1997
    t7  += t107; t7  += t207; t7  += t307;
1998
    t8  += t108; t8  += t208; t8  += t308;
1999
    t9  += t109; t9  += t209;
2000
    t10 += t110; t10 += t210;
2001
    t11 += t111;
2002
    t12 += t112;
2003
    o = t14 >> 28;
2004
    t15 = o;
2005
    t14 -= o << 28;
2006
    o = (t0  >> 28); t1  += o; t = o << 28; t0  -= t;
2007
    o = (t1  >> 28); t2  += o; t = o << 28; t1  -= t;
2008
    o = (t2  >> 28); t3  += o; t = o << 28; t2  -= t;
2009
    o = (t3  >> 28); t4  += o; t = o << 28; t3  -= t;
2010
    o = (t4  >> 28); t5  += o; t = o << 28; t4  -= t;
2011
    o = (t5  >> 28); t6  += o; t = o << 28; t5  -= t;
2012
    o = (t6  >> 28); t7  += o; t = o << 28; t6  -= t;
2013
    o = (t7  >> 28); t8  += o; t = o << 28; t7  -= t;
2014
    o = (t8  >> 28); t9  += o; t = o << 28; t8  -= t;
2015
    o = (t9  >> 28); t10 += o; t = o << 28; t9  -= t;
2016
    o = (t10 >> 28); t11 += o; t = o << 28; t10 -= t;
2017
    o = (t11 >> 28); t12 += o; t = o << 28; t11 -= t;
2018
    o = (t12 >> 28); t13 += o; t = o << 28; t12 -= t;
2019
    o = (t13 >> 28); t14 += o; t = o << 28; t13 -= t;
2020
    o = (t14 >> 28); t15 += o; t = o << 28; t14 -= t;
2021
    o = (t15 >> 28); t0  += o;
2022
                   t8  += o; t = o << 28; t15 -= t;
2023
2024
    /* Store */
2025
    r[0] = (sword32)t0;
2026
    r[1] = (sword32)t1;
2027
    r[2] = (sword32)t2;
2028
    r[3] = (sword32)t3;
2029
    r[4] = (sword32)t4;
2030
    r[5] = (sword32)t5;
2031
    r[6] = (sword32)t6;
2032
    r[7] = (sword32)t7;
2033
    r[8] = (sword32)t8;
2034
    r[9] = (sword32)t9;
2035
    r[10] = (sword32)t10;
2036
    r[11] = (sword32)t11;
2037
    r[12] = (sword32)t12;
2038
    r[13] = (sword32)t13;
2039
    r[14] = (sword32)t14;
2040
    r[15] = (sword32)t15;
2041
}
2042
2043
/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
2044
 *
2045
 * r  [in]  Field element to hold result.
2046
 * a  [in]  Field element to square.
2047
 */
2048
void fe448_sqr(sword32* r, const sword32* a)
2049
{
2050
    sword32 r0[16];
2051
    sword32 r1[16];
2052
    sword32* a1 = r1;
2053
    sword32 r2[16];
2054
    a1[0] = a[0] + a[8];
2055
    a1[1] = a[1] + a[9];
2056
    a1[2] = a[2] + a[10];
2057
    a1[3] = a[3] + a[11];
2058
    a1[4] = a[4] + a[12];
2059
    a1[5] = a[5] + a[13];
2060
    a1[6] = a[6] + a[14];
2061
    a1[7] = a[7] + a[15];
2062
    fe448_sqr_8(r2, a + 8);
2063
    fe448_sqr_8(r0, a);
2064
    fe448_sqr_8(r1, a1);
2065
    r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
2066
    r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
2067
    r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
2068
    r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
2069
    r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
2070
    r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
2071
    r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
2072
    r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
2073
    r[ 8] = r2[ 8]          + r1[ 0] - r0[ 0] + r1[ 8];
2074
    r[ 9] = r2[ 9]          + r1[ 1] - r0[ 1] + r1[ 9];
2075
    r[10] = r2[10]          + r1[ 2] - r0[ 2] + r1[10];
2076
    r[11] = r2[11]          + r1[ 3] - r0[ 3] + r1[11];
2077
    r[12] = r2[12]          + r1[ 4] - r0[ 4] + r1[12];
2078
    r[13] = r2[13]          + r1[ 5] - r0[ 5] + r1[13];
2079
    r[14] = r2[14]          + r1[ 6] - r0[ 6] + r1[14];
2080
    r[15] = r2[15]          + r1[ 7] - r0[ 7] + r1[15];
2081
}
2082
2083
/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
2084
 * Constant time implementation - using Fermat's little theorem:
2085
 *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
2086
 * For curve448: p - 2 = 2^448 - 2^224 - 3
2087
 *
2088
 * r  [in]  Field element to hold result.
2089
 * a  [in]  Field element to invert.
2090
 */
2091
void fe448_invert(sword32* r, const sword32* a)
2092
{
2093
    sword32 t1[16];
2094
    sword32 t2[16];
2095
    sword32 t3[16];
2096
    sword32 t4[16];
2097
    int i;
2098
2099
    fe448_sqr(t1, a);
2100
    /* t1 = 2 */
2101
    fe448_mul(t1, t1, a);
2102
    /* t1 = 3 */
2103
    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2104
    /* t2 = c */
2105
    fe448_mul(t3, t2, a);
2106
    /* t3 = d */
2107
    fe448_mul(t1, t2, t1);
2108
    /* t1 = f */
2109
    fe448_sqr(t2, t1);
2110
    /* t2 = 1e */
2111
    fe448_mul(t4, t2, a);
2112
    /* t4 = 1f */
2113
    fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2114
    /* t2 = 3e0 */
2115
    fe448_mul(t1, t2, t4);
2116
    /* t1 = 3ff */
2117
    fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
2118
    /* t2 = ffc00 */
2119
    fe448_mul(t1, t2, t1);
2120
    /* t1 = fffff */
2121
    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2122
    /* t2 = 1ffffe0 */
2123
    fe448_mul(t1, t2, t4);
2124
    /* t1 = 1ffffff */
2125
    fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
2126
    /* t2 = 3fffffe000000 */
2127
    fe448_mul(t1, t2, t1);
2128
    /* t1 = 3ffffffffffff */
2129
    fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2130
    /* t2 = 7fffffffffffe0 */
2131
    fe448_mul(t1, t2, t4);
2132
    /* t1 = 7fffffffffffff */
2133
    fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
2134
    /* t2 = 3fffffffffffff80000000000000 */
2135
    fe448_mul(t1, t2, t1);
2136
    /* t1 = 3fffffffffffffffffffffffffff */
2137
    fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
2138
    /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
2139
    fe448_mul(t1, t2, t1);
2140
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2141
    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2142
    /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
2143
    fe448_mul(t3, t3, t2);
2144
    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2145
    fe448_mul(t1, t3, a);
2146
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2147
    fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
2148
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
2149
    fe448_mul(r, t3, t1);
2150
    /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2151
}
2152
2153
/* Scalar multiply the point by a number. r = n.a
2154
 * Uses Montgomery ladder and only requires the x-ordinate.
2155
 *
2156
 * r  [in]  Field element to hold result.
2157
 * n  [in]  Scalar as an array of bytes.
2158
 * a  [in]  Point to multiply - x-ordinate only.
2159
 */
2160
int curve448(byte* r, const byte* n, const byte* a)
2161
{
2162
    sword32 x1[16];
2163
    sword32 x2[16];
2164
    sword32 z2[16];
2165
    sword32 x3[16];
2166
    sword32 z3[16];
2167
    sword32 t0[16];
2168
    sword32 t1[16];
2169
    int i;
2170
    unsigned int swap;
2171
    unsigned int b;
2172
2173
    fe448_from_bytes(x1, a);
2174
    fe448_1(x2);
2175
    fe448_0(z2);
2176
    fe448_copy(x3, x1);
2177
    fe448_1(z3);
2178
2179
    swap = 0;
2180
    for (i = 447; i >= 0; --i) {
2181
        b = (n[i >> 3] >> (i & 7)) & 1;
2182
        swap ^= b;
2183
        fe448_cswap(x2, x3, swap);
2184
        fe448_cswap(z2, z3, swap);
2185
        swap = b;
2186
2187
        /* Montgomery Ladder - double and add */
2188
        fe448_add(t0, x2, z2);
2189
        fe448_reduce(t0);
2190
        fe448_add(t1, x3, z3);
2191
        fe448_reduce(t1);
2192
        fe448_sub(x2, x2, z2);
2193
        fe448_sub(x3, x3, z3);
2194
        fe448_mul(t1, t1, x2);
2195
        fe448_mul(z3, x3, t0);
2196
        fe448_sqr(t0, t0);
2197
        fe448_sqr(x2, x2);
2198
        fe448_add(x3, z3, t1);
2199
        fe448_reduce(x3);
2200
        fe448_sqr(x3, x3);
2201
        fe448_sub(z3, z3, t1);
2202
        fe448_sqr(z3, z3);
2203
        fe448_mul(z3, z3, x1);
2204
        fe448_sub(t1, t0, x2);
2205
        fe448_mul(x2, t0, x2);
2206
        fe448_mul39081(z2, t1);
2207
        fe448_add(z2, t0, z2);
2208
        fe448_mul(z2, z2, t1);
2209
    }
2210
    /* Last two bits are 0 - no final swap check required. */
2211
2212
    fe448_invert(z2, z2);
2213
    fe448_mul(x2, x2, z2);
2214
    fe448_to_bytes(r, x2);
2215
2216
    return 0;
2217
}
2218
2219
#ifdef HAVE_ED448
2220
/* Check whether field element is not 0.
2221
 * Must convert to a normalized form before checking.
2222
 *
2223
 * a  [in]  Field element.
2224
 * returns 0 when zero, and any other value otherwise.
2225
 */
2226
int fe448_isnonzero(const sword32* a)
2227
{
2228
    byte b[56];
2229
    int i;
2230
    byte c = 0;
2231
    fe448_to_bytes(b, a);
2232
    for (i = 0; i < 56; i++)
2233
        c |= b[i];
2234
    return c;
2235
}
2236
2237
/* Check whether field element is negative.
2238
 * Must convert to a normalized form before checking.
2239
 *
2240
 * a  [in]  Field element.
2241
 * returns 1 when negative, and 0 otherwise.
2242
 */
2243
int fe448_isnegative(const sword32* a)
2244
{
2245
    byte b[56];
2246
    fe448_to_bytes(b, a);
2247
    return b[0] & 1;
2248
}
2249
2250
/* Negates the field element. r = -a
2251
 *
2252
 * r  [in]  Field element to hold result.
2253
 * a  [in]  Field element.
2254
 */
2255
void fe448_neg(sword32* r, const sword32* a)
2256
{
2257
    r[0] = -a[0];
2258
    r[1] = -a[1];
2259
    r[2] = -a[2];
2260
    r[3] = -a[3];
2261
    r[4] = -a[4];
2262
    r[5] = -a[5];
2263
    r[6] = -a[6];
2264
    r[7] = -a[7];
2265
    r[8] = -a[8];
2266
    r[9] = -a[9];
2267
    r[10] = -a[10];
2268
    r[11] = -a[11];
2269
    r[12] = -a[12];
2270
    r[13] = -a[13];
2271
    r[14] = -a[14];
2272
    r[15] = -a[15];
2273
}
2274
2275
/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
2276
 * Used for calcualting y-ordinate from x-ordinate for Ed448.
2277
 *
2278
 * r  [in]  Field element to hold result.
2279
 * a  [in]  Field element to exponentiate.
2280
 */
2281
void fe448_pow_2_446_222_1(sword32* r, const sword32* a)
2282
{
2283
    sword32 t1[16];
2284
    sword32 t2[16];
2285
    sword32 t3[16];
2286
    sword32 t4[16];
2287
    sword32 t5[16];
2288
    int i;
2289
2290
    fe448_sqr(t3, a);
2291
    /* t3 = 2 */
2292
    fe448_mul(t1, t3, a);
2293
    /* t1 = 3 */
2294
    fe448_sqr(t5, t1);
2295
    /* t5 = 6 */
2296
    fe448_mul(t5, t5, a);
2297
    /* t5 = 7 */
2298
    fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2299
    /* t2 = c */
2300
    fe448_mul(t3, t2, t3);
2301
    /* t3 = e */
2302
    fe448_mul(t1, t2, t1);
2303
    /* t1 = f */
2304
    fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
2305
    /* t2 = 78 */
2306
    fe448_mul(t5, t2, t5);
2307
    /* t5 = 7f */
2308
    fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2309
    /* t2 = f0 */
2310
    fe448_mul(t1, t2, t1);
2311
    /* t1 = ff */
2312
    fe448_mul(t3, t3, t2);
2313
    /* t3 = fe */
2314
    fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
2315
    /* t2 = 7f80 */
2316
    fe448_mul(t5, t2, t5);
2317
    /* t5 = 7fff */
2318
    fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
2319
    /* t2 = ff00 */
2320
    fe448_mul(t1, t2, t1);
2321
    /* t1 = ffff */
2322
    fe448_mul(t3, t3, t2);
2323
    /* t3 = fffe */
2324
    fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
2325
    /* t2 = 3fff8000 */
2326
    fe448_mul(t5, t2, t5);
2327
    /* t5 = 3fffffff */
2328
    fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
2329
    /* t2 = ffff0000 */
2330
    fe448_mul(t1, t2, t1);
2331
    /* t1 = ffffffff */
2332
    fe448_mul(t3, t3, t2);
2333
    /* t3 = fffffffe */
2334
    fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2335
    /* t2 = ffffffff00000000 */
2336
    fe448_mul(t2, t2, t1);
2337
    /* t2 = ffffffffffffffff */
2338
    fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2339
    /* t1 = ffffffffffffffff0000000000000000 */
2340
    fe448_mul(t1, t1, t2);
2341
    /* t1 = ffffffffffffffffffffffffffffffff */
2342
    fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2343
    /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
2344
    fe448_mul(t4, t1, t2);
2345
    /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
2346
    fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2347
    /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
2348
    fe448_mul(t3, t3, t2);
2349
    /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2350
    fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
2351
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
2352
    fe448_mul(t1, t1, t4);
2353
    /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
2354
    fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
2355
    /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
2356
    fe448_mul(r, t5, t1);
2357
    /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2358
}
2359
2360
/* Constant time, conditional move of b into a.
2361
 * a is not changed if the condition is 0.
2362
 *
2363
 * a  A field element.
2364
 * b  A field element.
2365
 * c  If 1 then copy and if 0 then don't copy.
2366
 */
2367
void fe448_cmov(sword32* a, const sword32* b, int c)
2368
{
2369
    sword32 m = -(sword32)c;
2370
    sword32 t0 = m & (a[0] ^ b[0]);
2371
    sword32 t1 = m & (a[1] ^ b[1]);
2372
    sword32 t2 = m & (a[2] ^ b[2]);
2373
    sword32 t3 = m & (a[3] ^ b[3]);
2374
    sword32 t4 = m & (a[4] ^ b[4]);
2375
    sword32 t5 = m & (a[5] ^ b[5]);
2376
    sword32 t6 = m & (a[6] ^ b[6]);
2377
    sword32 t7 = m & (a[7] ^ b[7]);
2378
    sword32 t8 = m & (a[8] ^ b[8]);
2379
    sword32 t9 = m & (a[9] ^ b[9]);
2380
    sword32 t10 = m & (a[10] ^ b[10]);
2381
    sword32 t11 = m & (a[11] ^ b[11]);
2382
    sword32 t12 = m & (a[12] ^ b[12]);
2383
    sword32 t13 = m & (a[13] ^ b[13]);
2384
    sword32 t14 = m & (a[14] ^ b[14]);
2385
    sword32 t15 = m & (a[15] ^ b[15]);
2386
2387
    a[0] ^= t0;
2388
    a[1] ^= t1;
2389
    a[2] ^= t2;
2390
    a[3] ^= t3;
2391
    a[4] ^= t4;
2392
    a[5] ^= t5;
2393
    a[6] ^= t6;
2394
    a[7] ^= t7;
2395
    a[8] ^= t8;
2396
    a[9] ^= t9;
2397
    a[10] ^= t10;
2398
    a[11] ^= t11;
2399
    a[12] ^= t12;
2400
    a[13] ^= t13;
2401
    a[14] ^= t14;
2402
    a[15] ^= t15;
2403
}
2404
2405
#endif /* HAVE_ED448 */
2406
#endif
2407
2408
#endif /* HAVE_CURVE448 || HAVE_ED448 */