/src/wolfssl-disable-fastmath/wolfcrypt/src/fe_448.c

Line	Count	Source
1		/* fe_448.c
2		*
3		* Copyright (C) 2006-2022 wolfSSL Inc.
4		*
5		* This file is part of wolfSSL.
6		*
7		* wolfSSL is free software; you can redistribute it and/or modify
8		* it under the terms of the GNU General Public License as published by
9		* the Free Software Foundation; either version 2 of the License, or
10		* (at your option) any later version.
11		*
12		* wolfSSL is distributed in the hope that it will be useful,
13		* but WITHOUT ANY WARRANTY; without even the implied warranty of
14		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		* GNU General Public License for more details.
16		*
17		* You should have received a copy of the GNU General Public License
18		* along with this program; if not, write to the Free Software
19		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20		*/
21
22		/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
23		* Small implementation based on Daniel Beer's curve25519 public domain work.
24		* Reworked for curve448 by Sean Parkinson.
25		*/
26
27		#ifdef HAVE_CONFIG_H
28		#include <config.h>
29		#endif
30
31		#include <wolfssl/wolfcrypt/settings.h>
32
33		#if defined(HAVE_CURVE448) \|\| defined(HAVE_ED448)
34
35		#include <wolfssl/wolfcrypt/fe_448.h>
36
37		#ifdef NO_INLINE
38		#include <wolfssl/wolfcrypt/misc.h>
39		#else
40		#define WOLFSSL_MISC_INCLUDED
41		#include <wolfcrypt/src/misc.c>
42		#endif
43
44		#if defined(CURVE448_SMALL) \|\| defined(ED448_SMALL)
45
46		/* Initialize the field element operations.
47		*/
48		void fe448_init(void)
49		{
50		}
51
52		/* Normalize the field element.
53		* Ensure result is in range: 0..2^448-2^224-2
54		*
55		* a [in] Field element in range 0..2^448-1.
56		*/
57		void fe448_norm(word8* a)
58		{
59		int i;
60		sword16 c = 0;
61		sword16 o = 0;
62
63		for (i = 0; i < 56; i++) {
64		c += a[i];
65		if ((i == 0) \|\| (i == 28))
66		c += 1;
67		c >>= 8;
68		}
69
70		for (i = 0; i < 56; i++) {
71		if ((i == 0) \|\| (i == 28)) o += c;
72		o += a[i];
73		a[i] = (word8)o;
74		o >>= 8;
75		}
76		}
77
78		/* Copy one field element into another: d = a.
79		*
80		* d [in] Destination field element.
81		* a [in] Source field element.
82		*/
83		void fe448_copy(word8* d, const word8* a)
84		{
85		int i;
86		for (i = 0; i < 56; i++) {
87		d[i] = a[i];
88		}
89		}
90
91		/* Conditionally swap the elements.
92		* Constant time implementation.
93		*
94		* a [in] First field element.
95		* b [in] Second field element.
96		* c [in] Swap when 1. Valid values: 0, 1.
97		*/
98		static void fe448_cswap(word8* a, word8* b, int c)
99		{
100		int i;
101		word8 mask = -(word8)c;
102		word8 t[56];
103
104		for (i = 0; i < 56; i++)
105		t[i] = (a[i] ^ b[i]) & mask;
106		for (i = 0; i < 56; i++)
107		a[i] ^= t[i];
108		for (i = 0; i < 56; i++)
109		b[i] ^= t[i];
110		}
111
112		/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
113		*
114		* r [in] Field element to hold sum.
115		* a [in] Field element to add.
116		* b [in] Field element to add.
117		*/
118		void fe448_add(word8* r, const word8* a, const word8* b)
119		{
120		int i;
121		sword16 c = 0;
122		sword16 o = 0;
123
124		for (i = 0; i < 56; i++) {
125		c += a[i];
126		c += b[i];
127		r[i] = (word8)c;
128		c >>= 8;
129		}
130
131		for (i = 0; i < 56; i++) {
132		if ((i == 0) \|\| (i == 28)) o += c;
133		o += r[i];
134		r[i] = (word8)o;
135		o >>= 8;
136		}
137		}
138
139		/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
140		*
141		* r [in] Field element to hold difference.
142		* a [in] Field element to subtract from.
143		* b [in] Field element to subtract.
144		*/
145		void fe448_sub(word8* r, const word8* a, const word8* b)
146		{
147		int i;
148		sword16 c = 0;
149		sword16 o = 0;
150
151		for (i = 0; i < 56; i++) {
152		if (i == 28)
153		c += 0x1fc;
154		else
155		c += 0x1fe;
156		c += a[i];
157		c -= b[i];
158		r[i] = (word8)c;
159		c >>= 8;
160		}
161
162		for (i = 0; i < 56; i++) {
163		if ((i == 0) \|\| (i == 28)) o += c;
164		o += r[i];
165		r[i] = (word8)o;
166		o >>= 8;
167		}
168		}
169
170		/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
171		*
172		* r [in] Field element to hold result.
173		* a [in] Field element to multiply.
174		*/
175		void fe448_mul39081(word8* r, const word8* a)
176		{
177		int i;
178		sword32 c = 0;
179		sword32 o = 0;
180
181		for (i = 0; i < 56; i++) {
182		c += a[i] * (sword32)39081;
183		r[i] = (word8)c;
184		c >>= 8;
185		}
186
187		for (i = 0; i < 56; i++) {
188		if ((i == 0) \|\| (i == 28)) o += c;
189		o += r[i];
190		r[i] = (word8)o;
191		o >>= 8;
192		}
193		}
194
195		/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
196		*
197		* r [in] Field element to hold result.
198		* a [in] Field element to multiply.
199		* b [in] Field element to multiply.
200		*/
201		void fe448_mul(word8* r, const word8* a, const word8* b)
202		{
203		int i, k;
204		sword32 c = 0;
205		sword16 o = 0, cc = 0;
206		word8 t[112];
207
208		for (k = 0; k < 56; k++) {
209		i = 0;
210		for (; i <= k; i++) {
211		c += (sword32)a[i] * b[k - i];
212		}
213		t[k] = (word8)c;
214		c >>= 8;
215		}
216		for (; k < 111; k++) {
217		i = k - 55;
218		for (; i < 56; i++) {
219		c += (sword32)a[i] * b[k - i];
220		}
221		t[k] = (word8)c;
222		c >>= 8;
223		}
224		t[k] = (word8)c;
225
226		for (i = 0; i < 28; i++) {
227		o += t[i];
228		o += t[i + 56];
229		o += t[i + 84];
230		r[i] = (word8)o;
231		o >>= 8;
232		}
233		for (i = 28; i < 56; i++) {
234		o += t[i];
235		o += t[i + 56];
236		o += t[i + 28];
237		o += t[i + 56];
238		r[i] = (word8)o;
239		o >>= 8;
240		}
241		for (i = 0; i < 56; i++) {
242		if ((i == 0) \|\| (i == 28)) cc += o;
243		cc += r[i];
244		r[i] = (word8)cc;
245		cc >>= 8;
246		}
247		}
248
249		/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
250		*
251		* r [in] Field element to hold result.
252		* a [in] Field element to square.
253		*/
254		void fe448_sqr(word8* r, const word8* a)
255		{
256		int i, k;
257		sword32 c = 0;
258		sword32 p;
259		sword16 o = 0, cc = 0;
260		word8 t[112];
261
262		for (k = 0; k < 56; k++) {
263		i = 0;
264		for (; i <= k; i++) {
265		if (k - i < i)
266		break;
267		p = (sword32)a[i] * a[k - i];
268		if (k - i != i)
269		p *= 2;
270		c += p;
271		}
272		t[k] = (word8)c;
273		c >>= 8;
274		}
275		for (; k < 111; k++) {
276		i = k - 55;
277		for (; i < 56; i++) {
278		if (k - i < i)
279		break;
280		p = (sword32)a[i] * a[k - i];
281		if (k - i != i)
282		p *= 2;
283		c += p;
284		}
285		t[k] = (word8)c;
286		c >>= 8;
287		}
288		t[k] = (word8)c;
289
290		for (i = 0; i < 28; i++) {
291		o += t[i];
292		o += t[i + 56];
293		o += t[i + 84];
294		r[i] = (word8)o;
295		o >>= 8;
296		}
297		for (i = 28; i < 56; i++) {
298		o += t[i];
299		o += t[i + 56];
300		o += t[i + 28];
301		o += t[i + 56];
302		r[i] = (word8)o;
303		o >>= 8;
304		}
305		for (i = 0; i < 56; i++) {
306		if ((i == 0) \|\| (i == 28)) cc += o;
307		cc += r[i];
308		r[i] = (word8)cc;
309		cc >>= 8;
310		}
311		fe448_norm(r);
312		}
313
314		/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
315		* Constant time implementation - using Fermat's little theorem:
316		* a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
317		* For curve448: p - 2 = 2^448 - 2^224 - 3
318		*
319		* r [in] Field element to hold result.
320		* a [in] Field element to invert.
321		*/
322		void fe448_invert(word8* r, const word8* a)
323		{
324		int i;
325		word8 t[56];
326
327		fe448_sqr(t, a);
328		fe448_mul(t, t, a);
329		for (i = 0; i < 221; i++) {
330		fe448_sqr(t, t);
331		fe448_mul(t, t, a);
332		}
333		fe448_sqr(t, t);
334		for (i = 0; i < 222; i++) {
335		fe448_sqr(t, t);
336		fe448_mul(t, t, a);
337		}
338		fe448_sqr(t, t);
339		fe448_sqr(t, t);
340		fe448_mul(r, t, a);
341		}
342
343		/* Scalar multiply the point by a number. r = n.a
344		* Uses Montgomery ladder and only requires the x-ordinate.
345		*
346		* r [in] Field element to hold result.
347		* n [in] Scalar as an array of bytes.
348		* a [in] Point to multiply - x-ordinate only.
349		*/
350		int curve448(byte* r, const byte* n, const byte* a)
351		{
352		word8 x1[56];
353		word8 x2[56] = {1};
354		word8 z2[56] = {0};
355		word8 x3[56];
356		word8 z3[56] = {1};
357		word8 t0[56];
358		word8 t1[56];
359		int i;
360		unsigned int swap;
361		unsigned int b;
362
363		fe448_copy(x1, a);
364		fe448_copy(x3, a);
365
366		swap = 0;
367		for (i = 447; i >= 0; --i) {
368		b = (n[i >> 3] >> (i & 7)) & 1;
369		swap ^= b;
370		fe448_cswap(x2, x3, swap);
371		fe448_cswap(z2, z3, swap);
372		swap = b;
373
374		/* Montgomery Ladder - double and add */
375		fe448_add(t0, x2, z2);
376		fe448_add(t1, x3, z3);
377		fe448_sub(x2, x2, z2);
378		fe448_sub(x3, x3, z3);
379		fe448_mul(t1, t1, x2);
380		fe448_mul(z3, x3, t0);
381		fe448_sqr(t0, t0);
382		fe448_sqr(x2, x2);
383		fe448_add(x3, z3, t1);
384		fe448_sqr(x3, x3);
385		fe448_sub(z3, z3, t1);
386		fe448_sqr(z3, z3);
387		fe448_mul(z3, z3, x1);
388		fe448_sub(t1, t0, x2);
389		fe448_mul(x2, t0, x2);
390		fe448_mul39081(z2, t1);
391		fe448_add(z2, t0, z2);
392		fe448_mul(z2, z2, t1);
393		}
394		fe448_cswap(x2, x3, swap);
395		fe448_cswap(z2, z3, swap);
396
397		fe448_invert(z2, z2);
398		fe448_mul(r, x2, z2);
399		fe448_norm(r);
400
401		return 0;
402		}
403
404		#ifdef HAVE_ED448
405		/* Check whether field element is not 0.
406		* Field element must have been normalized before call.
407		*
408		* a [in] Field element.
409		* returns 0 when zero, and any other value otherwise.
410		*/
411		int fe448_isnonzero(const word8* a)
412		{
413		int i;
414		byte c = 0;
415		for (i = 0; i < 56; i++)
416		c \|= a[i];
417		return c;
418		}
419
420		/* Negates the field element. r = -a mod (2^448 - 2^224 - 1)
421		* Add 0x200 to each element and subtract 2 from next.
422		* Top element overflow handled by subtracting 2 from index 0 and 28.
423		*
424		* r [in] Field element to hold result.
425		* a [in] Field element.
426		*/
427		void fe448_neg(word8* r, const word8* a)
428		{
429		int i;
430		sword16 c = 0;
431		sword16 o = 0;
432
433		for (i = 0; i < 56; i++) {
434		if (i == 28)
435		c += 0x1fc;
436		else
437		c += 0x1fe;
438		c -= a[i];
439		r[i] = (word8)c;
440		c >>= 8;
441		}
442
443		for (i = 0; i < 56; i++) {
444		if ((i == 0) \|\| (i == 28)) o += c;
445		o += r[i];
446		r[i] = (word8)o;
447		o >>= 8;
448		}
449		}
450
451		/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
452		* Used for calcualting y-ordinate from x-ordinate for Ed448.
453		*
454		* r [in] Field element to hold result.
455		* a [in] Field element to exponentiate.
456		*/
457		void fe448_pow_2_446_222_1(word8* r, const word8* a)
458		{
459		int i;
460		word8 t[56];
461
462		fe448_sqr(t, a);
463		fe448_mul(t, t, a);
464		for (i = 0; i < 221; i++) {
465		fe448_sqr(t, t);
466		fe448_mul(t, t, a);
467		}
468		fe448_sqr(t, t);
469		for (i = 0; i < 221; i++) {
470		fe448_sqr(t, t);
471		fe448_mul(t, t, a);
472		}
473		fe448_sqr(t, t);
474		fe448_mul(r, t, a);
475		}
476
477		/* Constant time, conditional move of b into a.
478		* a is not changed if the condition is 0.
479		*
480		* a A field element.
481		* b A field element.
482		* c If 1 then copy and if 0 then don't copy.
483		*/
484		void fe448_cmov(word8* a, const word8* b, int c)
485		{
486		int i;
487		word8 m = -(word8)c;
488		word8 t[56];
489
490		for (i = 0; i < 56; i++)
491		t[i] = m & (a[i] ^ b[i]);
492		for (i = 0; i < 56; i++)
493		a[i] ^= t[i];
494		}
495
496		#endif /* HAVE_ED448 */
497		#elif defined(CURVED448_128BIT)
498
499		/* Initialize the field element operations.
500		*/
501		void fe448_init(void)
502	1.44k	{
503	1.44k	}
504
505		/* Convert the field element from a byte array to an array of 56-bits.
506		*
507		* r [in] Array to encode into.
508		* b [in] Byte array.
509		*/
510		void fe448_from_bytes(sword64* r, const unsigned char* b)
511	638	{
512	638	r[ 0] = ((sword64) (b[ 0]) << 0)
513	638	\| ((sword64) (b[ 1]) << 8)
514	638	\| ((sword64) (b[ 2]) << 16)
515	638	\| ((sword64) (b[ 3]) << 24)
516	638	\| ((sword64) (b[ 4]) << 32)
517	638	\| ((sword64) (b[ 5]) << 40)
518	638	\| ((sword64) (b[ 6]) << 48);
519	638	r[ 1] = ((sword64) (b[ 7]) << 0)
520	638	\| ((sword64) (b[ 8]) << 8)
521	638	\| ((sword64) (b[ 9]) << 16)
522	638	\| ((sword64) (b[10]) << 24)
523	638	\| ((sword64) (b[11]) << 32)
524	638	\| ((sword64) (b[12]) << 40)
525	638	\| ((sword64) (b[13]) << 48);
526	638	r[ 2] = ((sword64) (b[14]) << 0)
527	638	\| ((sword64) (b[15]) << 8)
528	638	\| ((sword64) (b[16]) << 16)
529	638	\| ((sword64) (b[17]) << 24)
530	638	\| ((sword64) (b[18]) << 32)
531	638	\| ((sword64) (b[19]) << 40)
532	638	\| ((sword64) (b[20]) << 48);
533	638	r[ 3] = ((sword64) (b[21]) << 0)
534	638	\| ((sword64) (b[22]) << 8)
535	638	\| ((sword64) (b[23]) << 16)
536	638	\| ((sword64) (b[24]) << 24)
537	638	\| ((sword64) (b[25]) << 32)
538	638	\| ((sword64) (b[26]) << 40)
539	638	\| ((sword64) (b[27]) << 48);
540	638	r[ 4] = ((sword64) (b[28]) << 0)
541	638	\| ((sword64) (b[29]) << 8)
542	638	\| ((sword64) (b[30]) << 16)
543	638	\| ((sword64) (b[31]) << 24)
544	638	\| ((sword64) (b[32]) << 32)
545	638	\| ((sword64) (b[33]) << 40)
546	638	\| ((sword64) (b[34]) << 48);
547	638	r[ 5] = ((sword64) (b[35]) << 0)
548	638	\| ((sword64) (b[36]) << 8)
549	638	\| ((sword64) (b[37]) << 16)
550	638	\| ((sword64) (b[38]) << 24)
551	638	\| ((sword64) (b[39]) << 32)
552	638	\| ((sword64) (b[40]) << 40)
553	638	\| ((sword64) (b[41]) << 48);
554	638	r[ 6] = ((sword64) (b[42]) << 0)
555	638	\| ((sword64) (b[43]) << 8)
556	638	\| ((sword64) (b[44]) << 16)
557	638	\| ((sword64) (b[45]) << 24)
558	638	\| ((sword64) (b[46]) << 32)
559	638	\| ((sword64) (b[47]) << 40)
560	638	\| ((sword64) (b[48]) << 48);
561	638	r[ 7] = ((sword64) (b[49]) << 0)
562	638	\| ((sword64) (b[50]) << 8)
563	638	\| ((sword64) (b[51]) << 16)
564	638	\| ((sword64) (b[52]) << 24)
565	638	\| ((sword64) (b[53]) << 32)
566	638	\| ((sword64) (b[54]) << 40)
567	638	\| ((sword64) (b[55]) << 48);
568	638	}
569
570		/* Convert the field element to a byte array from an array of 56-bits.
571		*
572		* b [in] Byte array.
573		* a [in] Array to encode into.
574		*/
575		void fe448_to_bytes(unsigned char* b, const sword64* a)
576	4.82k	{
577	4.82k	sword128 t;
578		/* Mod */
579	4.82k	sword64 in0 = a[0];
580	4.82k	sword64 in1 = a[1];
581	4.82k	sword64 in2 = a[2];
582	4.82k	sword64 in3 = a[3];
583	4.82k	sword64 in4 = a[4];
584	4.82k	sword64 in5 = a[5];
585	4.82k	sword64 in6 = a[6];
586	4.82k	sword64 in7 = a[7];
587	4.82k	sword64 o = in7 >> 56;
588	4.82k	in7 -= o << 56;
589	4.82k	in0 += o;
590	4.82k	in4 += o;
591	4.82k	o = (in0 + 1) >> 56;
592	4.82k	o = (o + in1) >> 56;
593	4.82k	o = (o + in2) >> 56;
594	4.82k	o = (o + in3) >> 56;
595	4.82k	o = (o + in4 + 1) >> 56;
596	4.82k	o = (o + in5) >> 56;
597	4.82k	o = (o + in6) >> 56;
598	4.82k	o = (o + in7) >> 56;
599	4.82k	in0 += o;
600	4.82k	in4 += o;
601	4.82k	in7 -= o << 56;
602	4.82k	o = (in0 >> 56); in1 += o; t = o << 56; in0 -= (sword64)t;
603	4.82k	o = (in1 >> 56); in2 += o; t = o << 56; in1 -= (sword64)t;
604	4.82k	o = (in2 >> 56); in3 += o; t = o << 56; in2 -= (sword64)t;
605	4.82k	o = (in3 >> 56); in4 += o; t = o << 56; in3 -= (sword64)t;
606	4.82k	o = (in4 >> 56); in5 += o; t = o << 56; in4 -= (sword64)t;
607	4.82k	o = (in5 >> 56); in6 += o; t = o << 56; in5 -= (sword64)t;
608	4.82k	o = (in6 >> 56); in7 += o; t = o << 56; in6 -= (sword64)t;
609	4.82k	o = (in7 >> 56); in0 += o;
610	4.82k	in4 += o; t = o << 56; in7 -= (sword64)t;
611
612		/* Output as bytes */
613	4.82k	b[ 0] = (in0 >> 0);
614	4.82k	b[ 1] = (in0 >> 8);
615	4.82k	b[ 2] = (in0 >> 16);
616	4.82k	b[ 3] = (in0 >> 24);
617	4.82k	b[ 4] = (in0 >> 32);
618	4.82k	b[ 5] = (in0 >> 40);
619	4.82k	b[ 6] = (in0 >> 48);
620	4.82k	b[ 7] = (in1 >> 0);
621	4.82k	b[ 8] = (in1 >> 8);
622	4.82k	b[ 9] = (in1 >> 16);
623	4.82k	b[10] = (in1 >> 24);
624	4.82k	b[11] = (in1 >> 32);
625	4.82k	b[12] = (in1 >> 40);
626	4.82k	b[13] = (in1 >> 48);
627	4.82k	b[14] = (in2 >> 0);
628	4.82k	b[15] = (in2 >> 8);
629	4.82k	b[16] = (in2 >> 16);
630	4.82k	b[17] = (in2 >> 24);
631	4.82k	b[18] = (in2 >> 32);
632	4.82k	b[19] = (in2 >> 40);
633	4.82k	b[20] = (in2 >> 48);
634	4.82k	b[21] = (in3 >> 0);
635	4.82k	b[22] = (in3 >> 8);
636	4.82k	b[23] = (in3 >> 16);
637	4.82k	b[24] = (in3 >> 24);
638	4.82k	b[25] = (in3 >> 32);
639	4.82k	b[26] = (in3 >> 40);
640	4.82k	b[27] = (in3 >> 48);
641	4.82k	b[28] = (in4 >> 0);
642	4.82k	b[29] = (in4 >> 8);
643	4.82k	b[30] = (in4 >> 16);
644	4.82k	b[31] = (in4 >> 24);
645	4.82k	b[32] = (in4 >> 32);
646	4.82k	b[33] = (in4 >> 40);
647	4.82k	b[34] = (in4 >> 48);
648	4.82k	b[35] = (in5 >> 0);
649	4.82k	b[36] = (in5 >> 8);
650	4.82k	b[37] = (in5 >> 16);
651	4.82k	b[38] = (in5 >> 24);
652	4.82k	b[39] = (in5 >> 32);
653	4.82k	b[40] = (in5 >> 40);
654	4.82k	b[41] = (in5 >> 48);
655	4.82k	b[42] = (in6 >> 0);
656	4.82k	b[43] = (in6 >> 8);
657	4.82k	b[44] = (in6 >> 16);
658	4.82k	b[45] = (in6 >> 24);
659	4.82k	b[46] = (in6 >> 32);
660	4.82k	b[47] = (in6 >> 40);
661	4.82k	b[48] = (in6 >> 48);
662	4.82k	b[49] = (in7 >> 0);
663	4.82k	b[50] = (in7 >> 8);
664	4.82k	b[51] = (in7 >> 16);
665	4.82k	b[52] = (in7 >> 24);
666	4.82k	b[53] = (in7 >> 32);
667	4.82k	b[54] = (in7 >> 40);
668	4.82k	b[55] = (in7 >> 48);
669	4.82k	}
670
671		/* Set the field element to 0.
672		*
673		* a [in] Field element.
674		*/
675		void fe448_1(sword64* a)
676	175k	{
677	175k	a[0] = 1;
678	175k	a[1] = 0;
679	175k	a[2] = 0;
680	175k	a[3] = 0;
681	175k	a[4] = 0;
682	175k	a[5] = 0;
683	175k	a[6] = 0;
684	175k	a[7] = 0;
685	175k	}
686
687		/* Set the field element to 0.
688		*
689		* a [in] Field element.
690		*/
691		void fe448_0(sword64* a)
692	172k	{
693	172k	a[0] = 0;
694	172k	a[1] = 0;
695	172k	a[2] = 0;
696	172k	a[3] = 0;
697	172k	a[4] = 0;
698	172k	a[5] = 0;
699	172k	a[6] = 0;
700	172k	a[7] = 0;
701	172k	}
702
703		/* Copy one field element into another: d = a.
704		*
705		* d [in] Destination field element.
706		* a [in] Source field element.
707		*/
708		void fe448_copy(sword64* d, const sword64* a)
709	4.30k	{
710	4.30k	d[0] = a[0];
711	4.30k	d[1] = a[1];
712	4.30k	d[2] = a[2];
713	4.30k	d[3] = a[3];
714	4.30k	d[4] = a[4];
715	4.30k	d[5] = a[5];
716	4.30k	d[6] = a[6];
717	4.30k	d[7] = a[7];
718	4.30k	}
719
720		/* Conditionally swap the elements.
721		* Constant time implementation.
722		*
723		* a [in] First field element.
724		* b [in] Second field element.
725		* c [in] Swap when 1. Valid values: 0, 1.
726		*/
727		static void fe448_cswap(sword64* a, sword64* b, int c)
728	181k	{
729	181k	sword64 mask = -(sword64)c;
730	181k	sword64 t0 = (a[0] ^ b[0]) & mask;
731	181k	sword64 t1 = (a[1] ^ b[1]) & mask;
732	181k	sword64 t2 = (a[2] ^ b[2]) & mask;
733	181k	sword64 t3 = (a[3] ^ b[3]) & mask;
734	181k	sword64 t4 = (a[4] ^ b[4]) & mask;
735	181k	sword64 t5 = (a[5] ^ b[5]) & mask;
736	181k	sword64 t6 = (a[6] ^ b[6]) & mask;
737	181k	sword64 t7 = (a[7] ^ b[7]) & mask;
738	181k	a[0] ^= t0;
739	181k	a[1] ^= t1;
740	181k	a[2] ^= t2;
741	181k	a[3] ^= t3;
742	181k	a[4] ^= t4;
743	181k	a[5] ^= t5;
744	181k	a[6] ^= t6;
745	181k	a[7] ^= t7;
746	181k	b[0] ^= t0;
747	181k	b[1] ^= t1;
748	181k	b[2] ^= t2;
749	181k	b[3] ^= t3;
750	181k	b[4] ^= t4;
751	181k	b[5] ^= t5;
752	181k	b[6] ^= t6;
753	181k	b[7] ^= t7;
754	181k	}
755
756		/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
757		*
758		* r [in] Field element to hold sum.
759		* a [in] Field element to add.
760		* b [in] Field element to add.
761		*/
762		void fe448_add(sword64* r, const sword64* a, const sword64* b)
763	1.52M	{
764	1.52M	r[0] = a[0] + b[0];
765	1.52M	r[1] = a[1] + b[1];
766	1.52M	r[2] = a[2] + b[2];
767	1.52M	r[3] = a[3] + b[3];
768	1.52M	r[4] = a[4] + b[4];
769	1.52M	r[5] = a[5] + b[5];
770	1.52M	r[6] = a[6] + b[6];
771	1.52M	r[7] = a[7] + b[7];
772	1.52M	}
773
774		/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
775		*
776		* r [in] Field element to hold difference.
777		* a [in] Field element to subtract from.
778		* b [in] Field element to subtract.
779		*/
780		void fe448_sub(sword64* r, const sword64* a, const sword64* b)
781	1.70M	{
782	1.70M	r[0] = a[0] - b[0];
783	1.70M	r[1] = a[1] - b[1];
784	1.70M	r[2] = a[2] - b[2];
785	1.70M	r[3] = a[3] - b[3];
786	1.70M	r[4] = a[4] - b[4];
787	1.70M	r[5] = a[5] - b[5];
788	1.70M	r[6] = a[6] - b[6];
789	1.70M	r[7] = a[7] - b[7];
790	1.70M	}
791
792		/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
793		*
794		* r [in] Field element to hold result.
795		* a [in] Field element to multiply.
796		*/
797		void fe448_mul39081(sword64* r, const sword64* a)
798	309k	{
799	309k	sword128 t;
800	309k	sword64 o;
801	309k	sword128 t0 = a[0] * (sword128)39081;
802	309k	sword128 t1 = a[1] * (sword128)39081;
803	309k	sword128 t2 = a[2] * (sword128)39081;
804	309k	sword128 t3 = a[3] * (sword128)39081;
805	309k	sword128 t4 = a[4] * (sword128)39081;
806	309k	sword128 t5 = a[5] * (sword128)39081;
807	309k	sword128 t6 = a[6] * (sword128)39081;
808	309k	sword128 t7 = a[7] * (sword128)39081;
809	309k	o = (sword64)(t0 >> 56); t1 += o; t = (sword128)o << 56; t0 -= t;
810	309k	o = (sword64)(t1 >> 56); t2 += o; t = (sword128)o << 56; t1 -= t;
811	309k	o = (sword64)(t2 >> 56); t3 += o; t = (sword128)o << 56; t2 -= t;
812	309k	o = (sword64)(t3 >> 56); t4 += o; t = (sword128)o << 56; t3 -= t;
813	309k	o = (sword64)(t4 >> 56); t5 += o; t = (sword128)o << 56; t4 -= t;
814	309k	o = (sword64)(t5 >> 56); t6 += o; t = (sword128)o << 56; t5 -= t;
815	309k	o = (sword64)(t6 >> 56); t7 += o; t = (sword128)o << 56; t6 -= t;
816	309k	o = (sword64)(t7 >> 56); t0 += o;
817	309k	t4 += o; t = (sword128)o << 56; t7 -= t;
818
819		/* Store */
820	309k	r[0] = (sword64)t0;
821	309k	r[1] = (sword64)t1;
822	309k	r[2] = (sword64)t2;
823	309k	r[3] = (sword64)t3;
824	309k	r[4] = (sword64)t4;
825	309k	r[5] = (sword64)t5;
826	309k	r[6] = (sword64)t6;
827	309k	r[7] = (sword64)t7;
828	309k	}
829
830		/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
831		*
832		* r [in] Field element to hold result.
833		* a [in] Field element to multiply.
834		* b [in] Field element to multiply.
835		*/
836		void fe448_mul(sword64* r, const sword64* a, const sword64* b)
837	2.98M	{
838	2.98M	sword64 o;
839	2.98M	sword64 a1[4];
840	2.98M	sword64 b1[4];
841	2.98M	sword128 t0;
842	2.98M	sword128 t1;
843	2.98M	sword128 t2;
844	2.98M	sword128 t3;
845	2.98M	sword128 t4;
846	2.98M	sword128 t5;
847	2.98M	sword128 t6;
848	2.98M	sword128 t7;
849	2.98M	sword128 t03;
850	2.98M	sword128 t04;
851	2.98M	sword128 t13;
852	2.98M	sword128 t14;
853
854	2.98M	a1[0] = a[0] + a[4];
855	2.98M	a1[1] = a[1] + a[5];
856	2.98M	a1[2] = a[2] + a[6];
857	2.98M	a1[3] = a[3] + a[7];
858	2.98M	b1[0] = b[0] + b[4];
859	2.98M	b1[1] = b[1] + b[5];
860	2.98M	b1[2] = b[2] + b[6];
861	2.98M	b1[3] = b[3] + b[7];
862
863	2.98M	t03 = ((sword128)a[0] * b[3]) + ((sword128)a[1] * b[2])
864	2.98M	+ ((sword128)a[2] * b[1]) + ((sword128)a[3] * b[0]);
865	2.98M	t04 = ((sword128)a[1] * b[3]) + ((sword128)a[2] * b[2])
866	2.98M	+ ((sword128)a[3] * b[1]);
867	2.98M	t04 += t03 >> 56;
868	2.98M	t03 &= 0xffffffffffffffL;
869	2.98M	t13 = ((sword128)a1[0] * b1[3]) + ((sword128)a1[1] * b1[2])
870	2.98M	+ ((sword128)a1[2] * b1[1]) + ((sword128)a1[3] * b1[0]);
871	2.98M	t14 = ((sword128)a1[1] * b1[3]) + ((sword128)a1[2] * b1[2])
872	2.98M	+ ((sword128)a1[3] * b1[1]);
873	2.98M	t14 += t13 >> 56;
874	2.98M	t13 &= 0xffffffffffffffL;
875
876	2.98M	t0 = ((sword128)a[0] * b[0]) + ((sword128)a[4] * b[4]) + t14 + -t04;
877	2.98M	t1 = ((sword128)a[0] * b[1]) + ((sword128)a[1] * b[0])
878	2.98M	+ ((sword128)a[4] * b[5]) + ((sword128)a[5] * b[4])
879	2.98M	+ ((sword128)a1[2] * b1[3]) + ((sword128)a1[3] * b1[2])
880	2.98M	- ((sword128)a[2] * b[3]) - ((sword128)a[3] * b[2]);
881	2.98M	o = (sword64)(t0 >> 56); t1 += o; t0 &= 0xffffffffffffffL;
882	2.98M	t2 = ((sword128)a[0] * b[2]) + ((sword128)a[1] * b[1])
883	2.98M	+ ((sword128)a[2] * b[0]) + ((sword128)a[4] * b[6])
884	2.98M	+ ((sword128)a[5] * b[5]) + ((sword128)a[6] * b[4])
885	2.98M	+ ((sword128)a1[3] * b1[3]) - ((sword128)a[3] * b[3]);
886	2.98M	o = (sword64)(t1 >> 56); t2 += o; t1 &= 0xffffffffffffffL;
887	2.98M	t3 = t03 + ((sword128)a[4] * b[7]) + ((sword128)a[5] * b[6])
888	2.98M	+ ((sword128)a[6] * b[5]) + ((sword128)a[7] * b[4]);
889	2.98M	o = (sword64)(t2 >> 56); t3 += o; t2 &= 0xffffffffffffffL;
890	2.98M	t4 = ((sword128)a[5] * b[7]) + ((sword128)a[6] * b[6])
891	2.98M	+ ((sword128)a[7] * b[5]) + ((sword128)a1[0] * b1[0])
892	2.98M	- ((sword128)a[0] * b[0]) + t14;
893	2.98M	o = (sword64)(t3 >> 56); t4 += o; t3 &= 0xffffffffffffffL;
894	2.98M	t5 = ((sword128)a[6] * b[7]) + ((sword128)a[7] * b[6])
895	2.98M	+ ((sword128)a1[0] * b1[1]) + ((sword128)a1[1] * b1[0])
896	2.98M	- ((sword128)a[0] * b[1]) - ((sword128)a[1] * b[0])
897	2.98M	+ ((sword128)a1[2] * b1[3]) + ((sword128)a1[3] * b1[2]);
898	2.98M	o = (sword64)(t4 >> 56); t5 += o; t4 &= 0xffffffffffffffL;
899	2.98M	t6 = ((sword128)a[7] * b[7]) + ((sword128)a1[0] * b1[2])
900	2.98M	+ ((sword128)a1[1] * b1[1]) + ((sword128)a1[2] * b1[0])
901	2.98M	- ((sword128)a[0] * b[2]) - ((sword128)a[1] * b[1])
902	2.98M	- ((sword128)a[2] * b[0]) + ((sword128)a1[3] * b1[3]);
903	2.98M	o = (sword64)(t5 >> 56); t6 += o; t5 &= 0xffffffffffffffL;
904	2.98M	t7 = t13 + -t03;
905	2.98M	o = (sword64)(t6 >> 56); t7 += o; t6 &= 0xffffffffffffffL;
906	2.98M	o = (sword64)(t7 >> 56); t0 += o;
907	2.98M	t4 += o; t7 &= 0xffffffffffffffL;
908
909		/* Store */
910	2.98M	r[0] = (sword64)t0;
911	2.98M	r[1] = (sword64)t1;
912	2.98M	r[2] = (sword64)t2;
913	2.98M	r[3] = (sword64)t3;
914	2.98M	r[4] = (sword64)t4;
915	2.98M	r[5] = (sword64)t5;
916	2.98M	r[6] = (sword64)t6;
917	2.98M	r[7] = (sword64)t7;
918	2.98M	}
919
920		/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
921		*
922		* r [in] Field element to hold result.
923		* a [in] Field element to square.
924		*/
925		void fe448_sqr(sword64* r, const sword64* a)
926	2.36M	{
927	2.36M	sword64 o;
928	2.36M	sword64 a1[4];
929	2.36M	sword128 t0;
930	2.36M	sword128 t1;
931	2.36M	sword128 t2;
932	2.36M	sword128 t3;
933	2.36M	sword128 t4;
934	2.36M	sword128 t5;
935	2.36M	sword128 t6;
936	2.36M	sword128 t7;
937	2.36M	sword128 t03;
938	2.36M	sword128 t04;
939	2.36M	sword128 t13;
940	2.36M	sword128 t14;
941
942	2.36M	a1[0] = a[0] + a[4];
943	2.36M	a1[1] = a[1] + a[5];
944	2.36M	a1[2] = a[2] + a[6];
945	2.36M	a1[3] = a[3] + a[7];
946
947	2.36M	t03 = ((sword128)a[0] * (2 * a[3])) + ((sword128)a[1] * (2 * a[2]));
948	2.36M	t04 = ((sword128)a[1] * (2 * a[3])) + ((sword128)a[2] * a[2]);
949	2.36M	t04 += t03 >> 56;
950	2.36M	t03 &= 0xffffffffffffffL;
951	2.36M	t13 = ((sword128)a1[0] * (2 * a1[3])) + ((sword128)a1[1] * (2 * a1[2]));
952	2.36M	t14 = ((sword128)a1[1] * (2 * a1[3])) + ((sword128)a1[2] * a1[2]);
953	2.36M	t14 += t13 >> 56;
954	2.36M	t13 &= 0xffffffffffffffL;
955
956	2.36M	t0 = ((sword128)a[0] * a[0]) + ((sword128)a[4] * a[4]) + t14 + -t04;
957	2.36M	t1 = ((sword128)a[0] * (2 * a[1])) + ((sword128)a[4] * (2 * a[5]))
958	2.36M	+ ((sword128)a1[2] * (2 * a1[3])) - ((sword128)a[2] * (2 * a[3]));
959	2.36M	o = (sword64)(t0 >> 56); t1 += o; t0 &= 0xffffffffffffffL;
960	2.36M	t2 = ((sword128)a[0] * (2 * a[2])) + ((sword128)a[1] * a[1])
961	2.36M	+ ((sword128)a[4] * (2 * a[6])) + ((sword128)a[5] * a[5])
962	2.36M	+ ((sword128)a1[3] * a1[3]) - ((sword128)a[3] * a[3]);
963	2.36M	o = (sword64)(t1 >> 56); t2 += o; t1 &= 0xffffffffffffffL;
964	2.36M	t3 = t03 + ((sword128)a[4] * (2 * a[7])) + ((sword128)a[5] * (2 * a[6]));
965	2.36M	o = (sword64)(t2 >> 56); t3 += o; t2 &= 0xffffffffffffffL;
966	2.36M	t4 = ((sword128)a[5] * (2 * a[7])) + ((sword128)a[6] * a[6])
967	2.36M	+ ((sword128)a1[0] * a1[0]) - ((sword128)a[0] * a[0]) + t14;
968	2.36M	o = (sword64)(t3 >> 56); t4 += o; t3 &= 0xffffffffffffffL;
969	2.36M	t5 = ((sword128)a[6] * (2 * a[7])) + ((sword128)a1[0] * (2 * a1[1]))
970	2.36M	- ((sword128)a[0] * (2 * a[1])) + ((sword128)a1[2] * (2 * a1[3]));
971	2.36M	o = (sword64)(t4 >> 56); t5 += o; t4 &= 0xffffffffffffffL;
972	2.36M	t6 = ((sword128)a[7] * a[7]) + ((sword128)a1[0] * (2 * a1[2]))
973	2.36M	+ ((sword128)a1[1] * a1[1]) - ((sword128)a[0] * (2 * a[2]))
974	2.36M	- ((sword128)a[1] * a[1]) + ((sword128)a1[3] * a1[3]);
975	2.36M	o = (sword64)(t5 >> 56); t6 += o; t5 &= 0xffffffffffffffL;
976	2.36M	t7 = t13 + -t03;
977	2.36M	o = (sword64)(t6 >> 56); t7 += o; t6 &= 0xffffffffffffffL;
978	2.36M	o = (sword64)(t7 >> 56); t0 += o;
979	2.36M	t4 += o; t7 &= 0xffffffffffffffL;
980
981		/* Store */
982	2.36M	r[0] = (sword64)t0;
983	2.36M	r[1] = (sword64)t1;
984	2.36M	r[2] = (sword64)t2;
985	2.36M	r[3] = (sword64)t3;
986	2.36M	r[4] = (sword64)t4;
987	2.36M	r[5] = (sword64)t5;
988	2.36M	r[6] = (sword64)t6;
989	2.36M	r[7] = (sword64)t7;
990	2.36M	}
991
992		/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
993		* Constant time implementation - using Fermat's little theorem:
994		* a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
995		* For curve448: p - 2 = 2^448 - 2^224 - 3
996		*
997		* r [in] Field element to hold result.
998		* a [in] Field element to invert.
999		*/
1000		void fe448_invert(sword64* r, const sword64* a)
1001	2.07k	{
1002	2.07k	sword64 t1[8];
1003	2.07k	sword64 t2[8];
1004	2.07k	sword64 t3[8];
1005	2.07k	sword64 t4[8];
1006	2.07k	int i;
1007
1008	2.07k	fe448_sqr(t1, a);
1009		/* t1 = 2 */
1010	2.07k	fe448_mul(t1, t1, a);
1011		/* t1 = 3 */
1012	4.15k	fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1013		/* t2 = c */
1014	2.07k	fe448_mul(t3, t2, a);
1015		/* t3 = d */
1016	2.07k	fe448_mul(t1, t2, t1);
1017		/* t1 = f */
1018	2.07k	fe448_sqr(t2, t1);
1019		/* t2 = 1e */
1020	2.07k	fe448_mul(t4, t2, a);
1021		/* t4 = 1f */
1022	10.3k	fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1023		/* t2 = 3e0 */
1024	2.07k	fe448_mul(t1, t2, t4);
1025		/* t1 = 3ff */
1026	20.7k	fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
1027		/* t2 = ffc00 */
1028	2.07k	fe448_mul(t1, t2, t1);
1029		/* t1 = fffff */
1030	10.3k	fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1031		/* t2 = 1ffffe0 */
1032	2.07k	fe448_mul(t1, t2, t4);
1033		/* t1 = 1ffffff */
1034	51.9k	fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
1035		/* t2 = 3fffffe000000 */
1036	2.07k	fe448_mul(t1, t2, t1);
1037		/* t1 = 3ffffffffffff */
1038	10.3k	fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1039		/* t2 = 7fffffffffffe0 */
1040	2.07k	fe448_mul(t1, t2, t4);
1041		/* t1 = 7fffffffffffff */
1042	114k	fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
1043		/* t2 = 3fffffffffffff80000000000000 */
1044	2.07k	fe448_mul(t1, t2, t1);
1045		/* t1 = 3fffffffffffffffffffffffffff */
1046	228k	fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
1047		/* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
1048	2.07k	fe448_mul(t1, t2, t1);
1049		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1050	8.31k	fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1051		/* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
1052	2.07k	fe448_mul(t3, t3, t2);
1053		/* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1054	2.07k	fe448_mul(t1, t3, a);
1055		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1056	465k	fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
1057		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
1058	2.07k	fe448_mul(r, t3, t1);
1059		/* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1060	2.07k	}
1061
1062		/* Scalar multiply the point by a number. r = n.a
1063		* Uses Montgomery ladder and only requires the x-ordinate.
1064		*
1065		* r [in] Field element to hold result.
1066		* n [in] Scalar as an array of bytes.
1067		* a [in] Point to multiply - x-ordinate only.
1068		*/
1069		int curve448(byte* r, const byte* n, const byte* a)
1070	203	{
1071	203	sword64 x1[8];
1072	203	sword64 x2[8];
1073	203	sword64 z2[8];
1074	203	sword64 x3[8];
1075	203	sword64 z3[8];
1076	203	sword64 t0[8];
1077	203	sword64 t1[8];
1078	203	int i;
1079	203	unsigned int swap;
1080	203	unsigned int b;
1081
1082	203	fe448_from_bytes(x1, a);
1083	203	fe448_1(x2);
1084	203	fe448_0(z2);
1085	203	fe448_copy(x3, x1);
1086	203	fe448_1(z3);
1087
1088	203	swap = 0;
1089	91.1k	for (i = 447; i >= 0; --i) {
1090	90.9k	b = (n[i >> 3] >> (i & 7)) & 1;
1091	90.9k	swap ^= b;
1092	90.9k	fe448_cswap(x2, x3, swap);
1093	90.9k	fe448_cswap(z2, z3, swap);
1094	90.9k	swap = b;
1095
1096		/* Montgomery Ladder - double and add */
1097	90.9k	fe448_add(t0, x2, z2);
1098	90.9k	fe448_reduce(t0);
1099	90.9k	fe448_add(t1, x3, z3);
1100	90.9k	fe448_reduce(t1);
1101	90.9k	fe448_sub(x2, x2, z2);
1102	90.9k	fe448_sub(x3, x3, z3);
1103	90.9k	fe448_mul(t1, t1, x2);
1104	90.9k	fe448_mul(z3, x3, t0);
1105	90.9k	fe448_sqr(t0, t0);
1106	90.9k	fe448_sqr(x2, x2);
1107	90.9k	fe448_add(x3, z3, t1);
1108	90.9k	fe448_reduce(x3);
1109	90.9k	fe448_sqr(x3, x3);
1110	90.9k	fe448_sub(z3, z3, t1);
1111	90.9k	fe448_sqr(z3, z3);
1112	90.9k	fe448_mul(z3, z3, x1);
1113	90.9k	fe448_sub(t1, t0, x2);
1114	90.9k	fe448_mul(x2, t0, x2);
1115	90.9k	fe448_mul39081(z2, t1);
1116	90.9k	fe448_add(z2, t0, z2);
1117	90.9k	fe448_mul(z2, z2, t1);
1118	90.9k	}
1119		/* Last two bits are 0 - no final swap check required. */
1120
1121	203	fe448_invert(z2, z2);
1122	203	fe448_mul(x2, x2, z2);
1123	203	fe448_to_bytes(r, x2);
1124
1125	203	return 0;
1126	203	}
1127
1128		#ifdef HAVE_ED448
1129		/* Check whether field element is not 0.
1130		* Must convert to a normalized form before checking.
1131		*
1132		* a [in] Field element.
1133		* returns 0 when zero, and any other value otherwise.
1134		*/
1135		int fe448_isnonzero(const sword64* a)
1136	435	{
1137	435	byte b[56];
1138	435	int i;
1139	435	byte c = 0;
1140	435	fe448_to_bytes(b, a);
1141	24.7k	for (i = 0; i < 56; i++)
1142	24.3k	c \|= b[i];
1143	435	return c;
1144	435	}
1145
1146		/* Check whether field element is negative.
1147		* Must convert to a normalized form before checking.
1148		*
1149		* a [in] Field element.
1150		* returns 1 when negative, and 0 otherwise.
1151		*/
1152		int fe448_isnegative(const sword64* a)
1153	2.31k	{
1154	2.31k	byte b[56];
1155	2.31k	fe448_to_bytes(b, a);
1156	2.31k	return b[0] & 1;
1157	2.31k	}
1158
1159		/* Negates the field element. r = -a
1160		*
1161		* r [in] Field element to hold result.
1162		* a [in] Field element.
1163		*/
1164		void fe448_neg(sword64* r, const sword64* a)
1165	173k	{
1166	173k	r[0] = -a[0];
1167	173k	r[1] = -a[1];
1168	173k	r[2] = -a[2];
1169	173k	r[3] = -a[3];
1170	173k	r[4] = -a[4];
1171	173k	r[5] = -a[5];
1172	173k	r[6] = -a[6];
1173	173k	r[7] = -a[7];
1174	173k	}
1175
1176		/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
1177		* Used for calcualting y-ordinate from x-ordinate for Ed448.
1178		*
1179		* r [in] Field element to hold result.
1180		* a [in] Field element to exponentiate.
1181		*/
1182		void fe448_pow_2_446_222_1(sword64* r, const sword64* a)
1183	435	{
1184	435	sword64 t1[8];
1185	435	sword64 t2[8];
1186	435	sword64 t3[8];
1187	435	sword64 t4[8];
1188	435	sword64 t5[8];
1189	435	int i;
1190
1191	435	fe448_sqr(t3, a);
1192		/* t3 = 2 */
1193	435	fe448_mul(t1, t3, a);
1194		/* t1 = 3 */
1195	435	fe448_sqr(t5, t1);
1196		/* t5 = 6 */
1197	435	fe448_mul(t5, t5, a);
1198		/* t5 = 7 */
1199	870	fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1200		/* t2 = c */
1201	435	fe448_mul(t3, t2, t3);
1202		/* t3 = e */
1203	435	fe448_mul(t1, t2, t1);
1204		/* t1 = f */
1205	1.30k	fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
1206		/* t2 = 78 */
1207	435	fe448_mul(t5, t2, t5);
1208		/* t5 = 7f */
1209	1.74k	fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1210		/* t2 = f0 */
1211	435	fe448_mul(t1, t2, t1);
1212		/* t1 = ff */
1213	435	fe448_mul(t3, t3, t2);
1214		/* t3 = fe */
1215	3.04k	fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
1216		/* t2 = 7f80 */
1217	435	fe448_mul(t5, t2, t5);
1218		/* t5 = 7fff */
1219	3.48k	fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
1220		/* t2 = ff00 */
1221	435	fe448_mul(t1, t2, t1);
1222		/* t1 = ffff */
1223	435	fe448_mul(t3, t3, t2);
1224		/* t3 = fffe */
1225	6.52k	fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
1226		/* t2 = 3fff8000 */
1227	435	fe448_mul(t5, t2, t5);
1228		/* t5 = 3fffffff */
1229	6.96k	fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
1230		/* t2 = ffff0000 */
1231	435	fe448_mul(t1, t2, t1);
1232		/* t1 = ffffffff */
1233	435	fe448_mul(t3, t3, t2);
1234		/* t3 = fffffffe */
1235	13.9k	fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1236		/* t2 = ffffffff00000000 */
1237	435	fe448_mul(t2, t2, t1);
1238		/* t2 = ffffffffffffffff */
1239	27.8k	fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1240		/* t1 = ffffffffffffffff0000000000000000 */
1241	435	fe448_mul(t1, t1, t2);
1242		/* t1 = ffffffffffffffffffffffffffffffff */
1243	27.8k	fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1244		/* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
1245	435	fe448_mul(t4, t1, t2);
1246		/* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
1247	13.9k	fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1248		/* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
1249	435	fe448_mul(t3, t3, t2);
1250		/* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1251	83.5k	fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
1252		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
1253	435	fe448_mul(t1, t1, t4);
1254		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
1255	13.0k	fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
1256		/* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
1257	435	fe448_mul(r, t5, t1);
1258		/* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1259	435	}
1260
1261		/* Constant time, conditional move of b into a.
1262		* a is not changed if the condition is 0.
1263		*
1264		* a A field element.
1265		* b A field element.
1266		* c If 1 then copy and if 0 then don't copy.
1267		*/
1268		void fe448_cmov(sword64* a, const sword64* b, int c)
1269	2.92M	{
1270	2.92M	sword64 m = -(sword64)c;
1271	2.92M	sword64 t0 = m & (a[0] ^ b[0]);
1272	2.92M	sword64 t1 = m & (a[1] ^ b[1]);
1273	2.92M	sword64 t2 = m & (a[2] ^ b[2]);
1274	2.92M	sword64 t3 = m & (a[3] ^ b[3]);
1275	2.92M	sword64 t4 = m & (a[4] ^ b[4]);
1276	2.92M	sword64 t5 = m & (a[5] ^ b[5]);
1277	2.92M	sword64 t6 = m & (a[6] ^ b[6]);
1278	2.92M	sword64 t7 = m & (a[7] ^ b[7]);
1279
1280	2.92M	a[0] ^= t0;
1281	2.92M	a[1] ^= t1;
1282	2.92M	a[2] ^= t2;
1283	2.92M	a[3] ^= t3;
1284	2.92M	a[4] ^= t4;
1285	2.92M	a[5] ^= t5;
1286	2.92M	a[6] ^= t6;
1287	2.92M	a[7] ^= t7;
1288	2.92M	}
1289
1290		#endif /* HAVE_ED448 */
1291		#else
1292
1293		/* Initialize the field element operations.
1294		*/
1295		void fe448_init(void)
1296		{
1297		}
1298
1299		/* Convert the field element from a byte array to an array of 28-bits.
1300		*
1301		* r [in] Array to encode into.
1302		* b [in] Byte array.
1303		*/
1304		void fe448_from_bytes(sword32* r, const unsigned char* b)
1305		{
1306		r[ 0] = (((sword32)((b[ 0] ) >> 0)) << 0)
1307		\| (((sword32)((b[ 1] ) >> 0)) << 8)
1308		\| (((sword32)((b[ 2] ) >> 0)) << 16)
1309		\| ((((sword32)((b[ 3] & 0xf )) >> 0)) << 24);
1310		r[ 1] = (((sword32)((b[ 3] ) >> 4)) << 0)
1311		\| (((sword32)((b[ 4] ) >> 0)) << 4)
1312		\| (((sword32)((b[ 5] ) >> 0)) << 12)
1313		\| (((sword32)((b[ 6] ) >> 0)) << 20);
1314		r[ 2] = (((sword32)((b[ 7] ) >> 0)) << 0)
1315		\| (((sword32)((b[ 8] ) >> 0)) << 8)
1316		\| (((sword32)((b[ 9] ) >> 0)) << 16)
1317		\| ((((sword32)((b[10] & 0xf )) >> 0)) << 24);
1318		r[ 3] = (((sword32)((b[10] ) >> 4)) << 0)
1319		\| (((sword32)((b[11] ) >> 0)) << 4)
1320		\| (((sword32)((b[12] ) >> 0)) << 12)
1321		\| (((sword32)((b[13] ) >> 0)) << 20);
1322		r[ 4] = (((sword32)((b[14] ) >> 0)) << 0)
1323		\| (((sword32)((b[15] ) >> 0)) << 8)
1324		\| (((sword32)((b[16] ) >> 0)) << 16)
1325		\| ((((sword32)((b[17] & 0xf )) >> 0)) << 24);
1326		r[ 5] = (((sword32)((b[17] ) >> 4)) << 0)
1327		\| (((sword32)((b[18] ) >> 0)) << 4)
1328		\| (((sword32)((b[19] ) >> 0)) << 12)
1329		\| (((sword32)((b[20] ) >> 0)) << 20);
1330		r[ 6] = (((sword32)((b[21] ) >> 0)) << 0)
1331		\| (((sword32)((b[22] ) >> 0)) << 8)
1332		\| (((sword32)((b[23] ) >> 0)) << 16)
1333		\| ((((sword32)((b[24] & 0xf )) >> 0)) << 24);
1334		r[ 7] = (((sword32)((b[24] ) >> 4)) << 0)
1335		\| (((sword32)((b[25] ) >> 0)) << 4)
1336		\| (((sword32)((b[26] ) >> 0)) << 12)
1337		\| (((sword32)((b[27] ) >> 0)) << 20);
1338		r[ 8] = (((sword32)((b[28] ) >> 0)) << 0)
1339		\| (((sword32)((b[29] ) >> 0)) << 8)
1340		\| (((sword32)((b[30] ) >> 0)) << 16)
1341		\| ((((sword32)((b[31] & 0xf )) >> 0)) << 24);
1342		r[ 9] = (((sword32)((b[31] ) >> 4)) << 0)
1343		\| (((sword32)((b[32] ) >> 0)) << 4)
1344		\| (((sword32)((b[33] ) >> 0)) << 12)
1345		\| (((sword32)((b[34] ) >> 0)) << 20);
1346		r[10] = (((sword32)((b[35] ) >> 0)) << 0)
1347		\| (((sword32)((b[36] ) >> 0)) << 8)
1348		\| (((sword32)((b[37] ) >> 0)) << 16)
1349		\| ((((sword32)((b[38] & 0xf )) >> 0)) << 24);
1350		r[11] = (((sword32)((b[38] ) >> 4)) << 0)
1351		\| (((sword32)((b[39] ) >> 0)) << 4)
1352		\| (((sword32)((b[40] ) >> 0)) << 12)
1353		\| (((sword32)((b[41] ) >> 0)) << 20);
1354		r[12] = (((sword32)((b[42] ) >> 0)) << 0)
1355		\| (((sword32)((b[43] ) >> 0)) << 8)
1356		\| (((sword32)((b[44] ) >> 0)) << 16)
1357		\| ((((sword32)((b[45] & 0xf )) >> 0)) << 24);
1358		r[13] = (((sword32)((b[45] ) >> 4)) << 0)
1359		\| (((sword32)((b[46] ) >> 0)) << 4)
1360		\| (((sword32)((b[47] ) >> 0)) << 12)
1361		\| (((sword32)((b[48] ) >> 0)) << 20);
1362		r[14] = (((sword32)((b[49] ) >> 0)) << 0)
1363		\| (((sword32)((b[50] ) >> 0)) << 8)
1364		\| (((sword32)((b[51] ) >> 0)) << 16)
1365		\| ((((sword32)((b[52] & 0xf )) >> 0)) << 24);
1366		r[15] = (((sword32)((b[52] ) >> 4)) << 0)
1367		\| (((sword32)((b[53] ) >> 0)) << 4)
1368		\| (((sword32)((b[54] ) >> 0)) << 12)
1369		\| (((sword32)((b[55] ) >> 0)) << 20);
1370		}
1371
1372		/* Convert the field element to a byte array from an array of 28-bits.
1373		*
1374		* b [in] Byte array.
1375		* a [in] Array to encode into.
1376		*/
1377		void fe448_to_bytes(unsigned char* b, const sword32* a)
1378		{
1379		sword64 t;
1380		/* Mod */
1381		sword32 in0 = a[0];
1382		sword32 in1 = a[1];
1383		sword32 in2 = a[2];
1384		sword32 in3 = a[3];
1385		sword32 in4 = a[4];
1386		sword32 in5 = a[5];
1387		sword32 in6 = a[6];
1388		sword32 in7 = a[7];
1389		sword32 in8 = a[8];
1390		sword32 in9 = a[9];
1391		sword32 in10 = a[10];
1392		sword32 in11 = a[11];
1393		sword32 in12 = a[12];
1394		sword32 in13 = a[13];
1395		sword32 in14 = a[14];
1396		sword32 in15 = a[15];
1397		sword32 o = in15 >> 28;
1398		in15 -= o << 28;
1399		in0 += o;
1400		in8 += o;
1401		o = (in0 + 1) >> 28;
1402		o = (o + in1) >> 28;
1403		o = (o + in2) >> 28;
1404		o = (o + in3) >> 28;
1405		o = (o + in4) >> 28;
1406		o = (o + in5) >> 28;
1407		o = (o + in6) >> 28;
1408		o = (o + in7) >> 28;
1409		o = (o + in8 + 1) >> 28;
1410		o = (o + in9) >> 28;
1411		o = (o + in10) >> 28;
1412		o = (o + in11) >> 28;
1413		o = (o + in12) >> 28;
1414		o = (o + in13) >> 28;
1415		o = (o + in14) >> 28;
1416		o = (o + in15) >> 28;
1417		in0 += o;
1418		in8 += o;
1419		in15 -= o << 28;
1420		o = (in0 >> 28); in1 += o; t = o << 28; in0 -= (sword32)t;
1421		o = (in1 >> 28); in2 += o; t = o << 28; in1 -= (sword32)t;
1422		o = (in2 >> 28); in3 += o; t = o << 28; in2 -= (sword32)t;
1423		o = (in3 >> 28); in4 += o; t = o << 28; in3 -= (sword32)t;
1424		o = (in4 >> 28); in5 += o; t = o << 28; in4 -= (sword32)t;
1425		o = (in5 >> 28); in6 += o; t = o << 28; in5 -= (sword32)t;
1426		o = (in6 >> 28); in7 += o; t = o << 28; in6 -= (sword32)t;
1427		o = (in7 >> 28); in8 += o; t = o << 28; in7 -= (sword32)t;
1428		o = (in8 >> 28); in9 += o; t = o << 28; in8 -= (sword32)t;
1429		o = (in9 >> 28); in10 += o; t = o << 28; in9 -= (sword32)t;
1430		o = (in10 >> 28); in11 += o; t = o << 28; in10 -= (sword32)t;
1431		o = (in11 >> 28); in12 += o; t = o << 28; in11 -= (sword32)t;
1432		o = (in12 >> 28); in13 += o; t = o << 28; in12 -= (sword32)t;
1433		o = (in13 >> 28); in14 += o; t = o << 28; in13 -= (sword32)t;
1434		o = (in14 >> 28); in15 += o; t = o << 28; in14 -= (sword32)t;
1435		o = (in15 >> 28); in0 += o;
1436		in8 += o; t = o << 28; in15 -= (sword32)t;
1437
1438		/* Output as bytes */
1439		b[ 0] = (in0 >> 0);
1440		b[ 1] = (in0 >> 8);
1441		b[ 2] = (in0 >> 16);
1442		b[ 3] = (in0 >> 24) + ((in1 >> 0) << 4);
1443		b[ 4] = (in1 >> 4);
1444		b[ 5] = (in1 >> 12);
1445		b[ 6] = (in1 >> 20);
1446		b[ 7] = (in2 >> 0);
1447		b[ 8] = (in2 >> 8);
1448		b[ 9] = (in2 >> 16);
1449		b[10] = (in2 >> 24) + ((in3 >> 0) << 4);
1450		b[11] = (in3 >> 4);
1451		b[12] = (in3 >> 12);
1452		b[13] = (in3 >> 20);
1453		b[14] = (in4 >> 0);
1454		b[15] = (in4 >> 8);
1455		b[16] = (in4 >> 16);
1456		b[17] = (in4 >> 24) + ((in5 >> 0) << 4);
1457		b[18] = (in5 >> 4);
1458		b[19] = (in5 >> 12);
1459		b[20] = (in5 >> 20);
1460		b[21] = (in6 >> 0);
1461		b[22] = (in6 >> 8);
1462		b[23] = (in6 >> 16);
1463		b[24] = (in6 >> 24) + ((in7 >> 0) << 4);
1464		b[25] = (in7 >> 4);
1465		b[26] = (in7 >> 12);
1466		b[27] = (in7 >> 20);
1467		b[28] = (in8 >> 0);
1468		b[29] = (in8 >> 8);
1469		b[30] = (in8 >> 16);
1470		b[31] = (in8 >> 24) + ((in9 >> 0) << 4);
1471		b[32] = (in9 >> 4);
1472		b[33] = (in9 >> 12);
1473		b[34] = (in9 >> 20);
1474		b[35] = (in10 >> 0);
1475		b[36] = (in10 >> 8);
1476		b[37] = (in10 >> 16);
1477		b[38] = (in10 >> 24) + ((in11 >> 0) << 4);
1478		b[39] = (in11 >> 4);
1479		b[40] = (in11 >> 12);
1480		b[41] = (in11 >> 20);
1481		b[42] = (in12 >> 0);
1482		b[43] = (in12 >> 8);
1483		b[44] = (in12 >> 16);
1484		b[45] = (in12 >> 24) + ((in13 >> 0) << 4);
1485		b[46] = (in13 >> 4);
1486		b[47] = (in13 >> 12);
1487		b[48] = (in13 >> 20);
1488		b[49] = (in14 >> 0);
1489		b[50] = (in14 >> 8);
1490		b[51] = (in14 >> 16);
1491		b[52] = (in14 >> 24) + ((in15 >> 0) << 4);
1492		b[53] = (in15 >> 4);
1493		b[54] = (in15 >> 12);
1494		b[55] = (in15 >> 20);
1495		}
1496
1497		/* Set the field element to 0.
1498		*
1499		* a [in] Field element.
1500		*/
1501		void fe448_1(sword32* a)
1502		{
1503		a[0] = 1;
1504		a[1] = 0;
1505		a[2] = 0;
1506		a[3] = 0;
1507		a[4] = 0;
1508		a[5] = 0;
1509		a[6] = 0;
1510		a[7] = 0;
1511		a[8] = 0;
1512		a[9] = 0;
1513		a[10] = 0;
1514		a[11] = 0;
1515		a[12] = 0;
1516		a[13] = 0;
1517		a[14] = 0;
1518		a[15] = 0;
1519		}
1520
1521		/* Set the field element to 0.
1522		*
1523		* a [in] Field element.
1524		*/
1525		void fe448_0(sword32* a)
1526		{
1527		a[0] = 0;
1528		a[1] = 0;
1529		a[2] = 0;
1530		a[3] = 0;
1531		a[4] = 0;
1532		a[5] = 0;
1533		a[6] = 0;
1534		a[7] = 0;
1535		a[8] = 0;
1536		a[9] = 0;
1537		a[10] = 0;
1538		a[11] = 0;
1539		a[12] = 0;
1540		a[13] = 0;
1541		a[14] = 0;
1542		a[15] = 0;
1543		}
1544
1545		/* Copy one field element into another: d = a.
1546		*
1547		* d [in] Destination field element.
1548		* a [in] Source field element.
1549		*/
1550		void fe448_copy(sword32* d, const sword32* a)
1551		{
1552		d[0] = a[0];
1553		d[1] = a[1];
1554		d[2] = a[2];
1555		d[3] = a[3];
1556		d[4] = a[4];
1557		d[5] = a[5];
1558		d[6] = a[6];
1559		d[7] = a[7];
1560		d[8] = a[8];
1561		d[9] = a[9];
1562		d[10] = a[10];
1563		d[11] = a[11];
1564		d[12] = a[12];
1565		d[13] = a[13];
1566		d[14] = a[14];
1567		d[15] = a[15];
1568		}
1569
1570		/* Conditionally swap the elements.
1571		* Constant time implementation.
1572		*
1573		* a [in] First field element.
1574		* b [in] Second field element.
1575		* c [in] Swap when 1. Valid values: 0, 1.
1576		*/
1577		static void fe448_cswap(sword32* a, sword32* b, int c)
1578		{
1579		sword32 mask = -(sword32)c;
1580		sword32 t0 = (a[0] ^ b[0]) & mask;
1581		sword32 t1 = (a[1] ^ b[1]) & mask;
1582		sword32 t2 = (a[2] ^ b[2]) & mask;
1583		sword32 t3 = (a[3] ^ b[3]) & mask;
1584		sword32 t4 = (a[4] ^ b[4]) & mask;
1585		sword32 t5 = (a[5] ^ b[5]) & mask;
1586		sword32 t6 = (a[6] ^ b[6]) & mask;
1587		sword32 t7 = (a[7] ^ b[7]) & mask;
1588		sword32 t8 = (a[8] ^ b[8]) & mask;
1589		sword32 t9 = (a[9] ^ b[9]) & mask;
1590		sword32 t10 = (a[10] ^ b[10]) & mask;
1591		sword32 t11 = (a[11] ^ b[11]) & mask;
1592		sword32 t12 = (a[12] ^ b[12]) & mask;
1593		sword32 t13 = (a[13] ^ b[13]) & mask;
1594		sword32 t14 = (a[14] ^ b[14]) & mask;
1595		sword32 t15 = (a[15] ^ b[15]) & mask;
1596		a[0] ^= t0;
1597		a[1] ^= t1;
1598		a[2] ^= t2;
1599		a[3] ^= t3;
1600		a[4] ^= t4;
1601		a[5] ^= t5;
1602		a[6] ^= t6;
1603		a[7] ^= t7;
1604		a[8] ^= t8;
1605		a[9] ^= t9;
1606		a[10] ^= t10;
1607		a[11] ^= t11;
1608		a[12] ^= t12;
1609		a[13] ^= t13;
1610		a[14] ^= t14;
1611		a[15] ^= t15;
1612		b[0] ^= t0;
1613		b[1] ^= t1;
1614		b[2] ^= t2;
1615		b[3] ^= t3;
1616		b[4] ^= t4;
1617		b[5] ^= t5;
1618		b[6] ^= t6;
1619		b[7] ^= t7;
1620		b[8] ^= t8;
1621		b[9] ^= t9;
1622		b[10] ^= t10;
1623		b[11] ^= t11;
1624		b[12] ^= t12;
1625		b[13] ^= t13;
1626		b[14] ^= t14;
1627		b[15] ^= t15;
1628		}
1629
1630		/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
1631		*
1632		* r [in] Field element to hold sum.
1633		* a [in] Field element to add.
1634		* b [in] Field element to add.
1635		*/
1636		void fe448_add(sword32* r, const sword32* a, const sword32* b)
1637		{
1638		r[0] = a[0] + b[0];
1639		r[1] = a[1] + b[1];
1640		r[2] = a[2] + b[2];
1641		r[3] = a[3] + b[3];
1642		r[4] = a[4] + b[4];
1643		r[5] = a[5] + b[5];
1644		r[6] = a[6] + b[6];
1645		r[7] = a[7] + b[7];
1646		r[8] = a[8] + b[8];
1647		r[9] = a[9] + b[9];
1648		r[10] = a[10] + b[10];
1649		r[11] = a[11] + b[11];
1650		r[12] = a[12] + b[12];
1651		r[13] = a[13] + b[13];
1652		r[14] = a[14] + b[14];
1653		r[15] = a[15] + b[15];
1654		}
1655
1656		/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
1657		*
1658		* r [in] Field element to hold difference.
1659		* a [in] Field element to subtract from.
1660		* b [in] Field element to subtract.
1661		*/
1662		void fe448_sub(sword32* r, const sword32* a, const sword32* b)
1663		{
1664		r[0] = a[0] - b[0];
1665		r[1] = a[1] - b[1];
1666		r[2] = a[2] - b[2];
1667		r[3] = a[3] - b[3];
1668		r[4] = a[4] - b[4];
1669		r[5] = a[5] - b[5];
1670		r[6] = a[6] - b[6];
1671		r[7] = a[7] - b[7];
1672		r[8] = a[8] - b[8];
1673		r[9] = a[9] - b[9];
1674		r[10] = a[10] - b[10];
1675		r[11] = a[11] - b[11];
1676		r[12] = a[12] - b[12];
1677		r[13] = a[13] - b[13];
1678		r[14] = a[14] - b[14];
1679		r[15] = a[15] - b[15];
1680		}
1681
1682		void fe448_reduce(sword32* a)
1683		{
1684		sword64 o;
1685
1686		o = a[0 ] >> 28; a[1 ] += (sword32)o; a[0 ] -= (sword32)(o << 28);
1687		o = a[1 ] >> 28; a[2 ] += (sword32)o; a[1 ] -= (sword32)(o << 28);
1688		o = a[2 ] >> 28; a[3 ] += (sword32)o; a[2 ] -= (sword32)(o << 28);
1689		o = a[3 ] >> 28; a[4 ] += (sword32)o; a[3 ] -= (sword32)(o << 28);
1690		o = a[4 ] >> 28; a[5 ] += (sword32)o; a[4 ] -= (sword32)(o << 28);
1691		o = a[5 ] >> 28; a[6 ] += (sword32)o; a[5 ] -= (sword32)(o << 28);
1692		o = a[6 ] >> 28; a[7 ] += (sword32)o; a[6 ] -= (sword32)(o << 28);
1693		o = a[7 ] >> 28; a[8 ] += (sword32)o; a[7 ] -= (sword32)(o << 28);
1694		o = a[8 ] >> 28; a[9 ] += (sword32)o; a[8 ] -= (sword32)(o << 28);
1695		o = a[9 ] >> 28; a[10] += (sword32)o; a[9 ] -= (sword32)(o << 28);
1696		o = a[10] >> 28; a[11] += (sword32)o; a[10] -= (sword32)(o << 28);
1697		o = a[11] >> 28; a[12] += (sword32)o; a[11] -= (sword32)(o << 28);
1698		o = a[12] >> 28; a[13] += (sword32)o; a[12] -= (sword32)(o << 28);
1699		o = a[13] >> 28; a[14] += (sword32)o; a[13] -= (sword32)(o << 28);
1700		o = a[14] >> 28; a[15] += (sword32)o; a[14] -= (sword32)(o << 28);
1701		o = a[15] >> 28; a[0] += (sword32)o;
1702		a[8] += (sword32)o; a[15] -= (sword32)(o << 28);
1703		}
1704		/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
1705		*
1706		* r [in] Field element to hold result.
1707		* a [in] Field element to multiply.
1708		*/
1709		void fe448_mul39081(sword32* r, const sword32* a)
1710		{
1711		sword64 t;
1712		sword32 o;
1713		sword64 t0 = a[0] * (sword64)39081;
1714		sword64 t1 = a[1] * (sword64)39081;
1715		sword64 t2 = a[2] * (sword64)39081;
1716		sword64 t3 = a[3] * (sword64)39081;
1717		sword64 t4 = a[4] * (sword64)39081;
1718		sword64 t5 = a[5] * (sword64)39081;
1719		sword64 t6 = a[6] * (sword64)39081;
1720		sword64 t7 = a[7] * (sword64)39081;
1721		sword64 t8 = a[8] * (sword64)39081;
1722		sword64 t9 = a[9] * (sword64)39081;
1723		sword64 t10 = a[10] * (sword64)39081;
1724		sword64 t11 = a[11] * (sword64)39081;
1725		sword64 t12 = a[12] * (sword64)39081;
1726		sword64 t13 = a[13] * (sword64)39081;
1727		sword64 t14 = a[14] * (sword64)39081;
1728		sword64 t15 = a[15] * (sword64)39081;
1729		o = (sword32)(t0 >> 28); t1 += o; t = (sword64)o << 28; t0 -= t;
1730		o = (sword32)(t1 >> 28); t2 += o; t = (sword64)o << 28; t1 -= t;
1731		o = (sword32)(t2 >> 28); t3 += o; t = (sword64)o << 28; t2 -= t;
1732		o = (sword32)(t3 >> 28); t4 += o; t = (sword64)o << 28; t3 -= t;
1733		o = (sword32)(t4 >> 28); t5 += o; t = (sword64)o << 28; t4 -= t;
1734		o = (sword32)(t5 >> 28); t6 += o; t = (sword64)o << 28; t5 -= t;
1735		o = (sword32)(t6 >> 28); t7 += o; t = (sword64)o << 28; t6 -= t;
1736		o = (sword32)(t7 >> 28); t8 += o; t = (sword64)o << 28; t7 -= t;
1737		o = (sword32)(t8 >> 28); t9 += o; t = (sword64)o << 28; t8 -= t;
1738		o = (sword32)(t9 >> 28); t10 += o; t = (sword64)o << 28; t9 -= t;
1739		o = (sword32)(t10 >> 28); t11 += o; t = (sword64)o << 28; t10 -= t;
1740		o = (sword32)(t11 >> 28); t12 += o; t = (sword64)o << 28; t11 -= t;
1741		o = (sword32)(t12 >> 28); t13 += o; t = (sword64)o << 28; t12 -= t;
1742		o = (sword32)(t13 >> 28); t14 += o; t = (sword64)o << 28; t13 -= t;
1743		o = (sword32)(t14 >> 28); t15 += o; t = (sword64)o << 28; t14 -= t;
1744		o = (sword32)(t15 >> 28); t0 += o;
1745		t8 += o; t = (sword64)o << 28; t15 -= t;
1746
1747		/* Store */
1748		r[0] = (sword32)t0;
1749		r[1] = (sword32)t1;
1750		r[2] = (sword32)t2;
1751		r[3] = (sword32)t3;
1752		r[4] = (sword32)t4;
1753		r[5] = (sword32)t5;
1754		r[6] = (sword32)t6;
1755		r[7] = (sword32)t7;
1756		r[8] = (sword32)t8;
1757		r[9] = (sword32)t9;
1758		r[10] = (sword32)t10;
1759		r[11] = (sword32)t11;
1760		r[12] = (sword32)t12;
1761		r[13] = (sword32)t13;
1762		r[14] = (sword32)t14;
1763		r[15] = (sword32)t15;
1764		}
1765
1766		/* Mulitply two field elements. r = a * b
1767		*
1768		* r [in] Field element to hold result.
1769		* a [in] Field element to multiply.
1770		* b [in] Field element to multiply.
1771		*/
1772		static WC_INLINE void fe448_mul_8(sword32* r, const sword32* a, const sword32* b)
1773		{
1774		sword64 t;
1775		sword64 t0 = (sword64)a[ 0] * b[ 0];
1776		sword64 t1 = (sword64)a[ 0] * b[ 1];
1777		sword64 t101 = (sword64)a[ 1] * b[ 0];
1778		sword64 t2 = (sword64)a[ 0] * b[ 2];
1779		sword64 t102 = (sword64)a[ 1] * b[ 1];
1780		sword64 t202 = (sword64)a[ 2] * b[ 0];
1781		sword64 t3 = (sword64)a[ 0] * b[ 3];
1782		sword64 t103 = (sword64)a[ 1] * b[ 2];
1783		sword64 t203 = (sword64)a[ 2] * b[ 1];
1784		sword64 t303 = (sword64)a[ 3] * b[ 0];
1785		sword64 t4 = (sword64)a[ 0] * b[ 4];
1786		sword64 t104 = (sword64)a[ 1] * b[ 3];
1787		sword64 t204 = (sword64)a[ 2] * b[ 2];
1788		sword64 t304 = (sword64)a[ 3] * b[ 1];
1789		sword64 t404 = (sword64)a[ 4] * b[ 0];
1790		sword64 t5 = (sword64)a[ 0] * b[ 5];
1791		sword64 t105 = (sword64)a[ 1] * b[ 4];
1792		sword64 t205 = (sword64)a[ 2] * b[ 3];
1793		sword64 t305 = (sword64)a[ 3] * b[ 2];
1794		sword64 t405 = (sword64)a[ 4] * b[ 1];
1795		sword64 t505 = (sword64)a[ 5] * b[ 0];
1796		sword64 t6 = (sword64)a[ 0] * b[ 6];
1797		sword64 t106 = (sword64)a[ 1] * b[ 5];
1798		sword64 t206 = (sword64)a[ 2] * b[ 4];
1799		sword64 t306 = (sword64)a[ 3] * b[ 3];
1800		sword64 t406 = (sword64)a[ 4] * b[ 2];
1801		sword64 t506 = (sword64)a[ 5] * b[ 1];
1802		sword64 t606 = (sword64)a[ 6] * b[ 0];
1803		sword64 t7 = (sword64)a[ 0] * b[ 7];
1804		sword64 t107 = (sword64)a[ 1] * b[ 6];
1805		sword64 t207 = (sword64)a[ 2] * b[ 5];
1806		sword64 t307 = (sword64)a[ 3] * b[ 4];
1807		sword64 t407 = (sword64)a[ 4] * b[ 3];
1808		sword64 t507 = (sword64)a[ 5] * b[ 2];
1809		sword64 t607 = (sword64)a[ 6] * b[ 1];
1810		sword64 t707 = (sword64)a[ 7] * b[ 0];
1811		sword64 t8 = (sword64)a[ 1] * b[ 7];
1812		sword64 t108 = (sword64)a[ 2] * b[ 6];
1813		sword64 t208 = (sword64)a[ 3] * b[ 5];
1814		sword64 t308 = (sword64)a[ 4] * b[ 4];
1815		sword64 t408 = (sword64)a[ 5] * b[ 3];
1816		sword64 t508 = (sword64)a[ 6] * b[ 2];
1817		sword64 t608 = (sword64)a[ 7] * b[ 1];
1818		sword64 t9 = (sword64)a[ 2] * b[ 7];
1819		sword64 t109 = (sword64)a[ 3] * b[ 6];
1820		sword64 t209 = (sword64)a[ 4] * b[ 5];
1821		sword64 t309 = (sword64)a[ 5] * b[ 4];
1822		sword64 t409 = (sword64)a[ 6] * b[ 3];
1823		sword64 t509 = (sword64)a[ 7] * b[ 2];
1824		sword64 t10 = (sword64)a[ 3] * b[ 7];
1825		sword64 t110 = (sword64)a[ 4] * b[ 6];
1826		sword64 t210 = (sword64)a[ 5] * b[ 5];
1827		sword64 t310 = (sword64)a[ 6] * b[ 4];
1828		sword64 t410 = (sword64)a[ 7] * b[ 3];
1829		sword64 t11 = (sword64)a[ 4] * b[ 7];
1830		sword64 t111 = (sword64)a[ 5] * b[ 6];
1831		sword64 t211 = (sword64)a[ 6] * b[ 5];
1832		sword64 t311 = (sword64)a[ 7] * b[ 4];
1833		sword64 t12 = (sword64)a[ 5] * b[ 7];
1834		sword64 t112 = (sword64)a[ 6] * b[ 6];
1835		sword64 t212 = (sword64)a[ 7] * b[ 5];
1836		sword64 t13 = (sword64)a[ 6] * b[ 7];
1837		sword64 t113 = (sword64)a[ 7] * b[ 6];
1838		sword64 t14 = (sword64)a[ 7] * b[ 7];
1839		t1 += t101;
1840		t2 += t102; t2 += t202;
1841		t3 += t103; t3 += t203; t3 += t303;
1842		t4 += t104; t4 += t204; t4 += t304; t4 += t404;
1843		t5 += t105; t5 += t205; t5 += t305; t5 += t405; t5 += t505;
1844		t6 += t106; t6 += t206; t6 += t306; t6 += t406; t6 += t506;
1845		t6 += t606;
1846		t7 += t107; t7 += t207; t7 += t307; t7 += t407; t7 += t507;
1847		t7 += t607;
1848		t7 += t707;
1849		t8 += t108; t8 += t208; t8 += t308; t8 += t408; t8 += t508;
1850		t8 += t608;
1851		t9 += t109; t9 += t209; t9 += t309; t9 += t409; t9 += t509;
1852		t10 += t110; t10 += t210; t10 += t310; t10 += t410;
1853		t11 += t111; t11 += t211; t11 += t311;
1854		t12 += t112; t12 += t212;
1855		t13 += t113;
1856		sword64 o = t14 >> 28;
1857		sword64 t15 = o;
1858		t14 -= o << 28;
1859		o = (t0 >> 28); t1 += o; t = o << 28; t0 -= t;
1860		o = (t1 >> 28); t2 += o; t = o << 28; t1 -= t;
1861		o = (t2 >> 28); t3 += o; t = o << 28; t2 -= t;
1862		o = (t3 >> 28); t4 += o; t = o << 28; t3 -= t;
1863		o = (t4 >> 28); t5 += o; t = o << 28; t4 -= t;
1864		o = (t5 >> 28); t6 += o; t = o << 28; t5 -= t;
1865		o = (t6 >> 28); t7 += o; t = o << 28; t6 -= t;
1866		o = (t7 >> 28); t8 += o; t = o << 28; t7 -= t;
1867		o = (t8 >> 28); t9 += o; t = o << 28; t8 -= t;
1868		o = (t9 >> 28); t10 += o; t = o << 28; t9 -= t;
1869		o = (t10 >> 28); t11 += o; t = o << 28; t10 -= t;
1870		o = (t11 >> 28); t12 += o; t = o << 28; t11 -= t;
1871		o = (t12 >> 28); t13 += o; t = o << 28; t12 -= t;
1872		o = (t13 >> 28); t14 += o; t = o << 28; t13 -= t;
1873		o = (t14 >> 28); t15 += o; t = o << 28; t14 -= t;
1874		o = (t15 >> 28); t0 += o;
1875		t8 += o; t = o << 28; t15 -= t;
1876
1877		/* Store */
1878		r[0] = (sword32)t0;
1879		r[1] = (sword32)t1;
1880		r[2] = (sword32)t2;
1881		r[3] = (sword32)t3;
1882		r[4] = (sword32)t4;
1883		r[5] = (sword32)t5;
1884		r[6] = (sword32)t6;
1885		r[7] = (sword32)t7;
1886		r[8] = (sword32)t8;
1887		r[9] = (sword32)t9;
1888		r[10] = (sword32)t10;
1889		r[11] = (sword32)t11;
1890		r[12] = (sword32)t12;
1891		r[13] = (sword32)t13;
1892		r[14] = (sword32)t14;
1893		r[15] = (sword32)t15;
1894		}
1895
1896		/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
1897		*
1898		* r [in] Field element to hold result.
1899		* a [in] Field element to multiply.
1900		* b [in] Field element to multiply.
1901		*/
1902		void fe448_mul(sword32* r, const sword32* a, const sword32* b)
1903		{
1904		sword32 r0[16];
1905		sword32 r1[16];
1906		sword32* a1 = r1;
1907		sword32 b1[8];
1908		sword32 r2[16];
1909		a1[0] = a[0] + a[8];
1910		a1[1] = a[1] + a[9];
1911		a1[2] = a[2] + a[10];
1912		a1[3] = a[3] + a[11];
1913		a1[4] = a[4] + a[12];
1914		a1[5] = a[5] + a[13];
1915		a1[6] = a[6] + a[14];
1916		a1[7] = a[7] + a[15];
1917		b1[0] = b[0] + b[8];
1918		b1[1] = b[1] + b[9];
1919		b1[2] = b[2] + b[10];
1920		b1[3] = b[3] + b[11];
1921		b1[4] = b[4] + b[12];
1922		b1[5] = b[5] + b[13];
1923		b1[6] = b[6] + b[14];
1924		b1[7] = b[7] + b[15];
1925		fe448_mul_8(r2, a + 8, b + 8);
1926		fe448_mul_8(r0, a, b);
1927		fe448_mul_8(r1, a1, b1);
1928		r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
1929		r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
1930		r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
1931		r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
1932		r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
1933		r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
1934		r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
1935		r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
1936		r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8];
1937		r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9];
1938		r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10];
1939		r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11];
1940		r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12];
1941		r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13];
1942		r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14];
1943		r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15];
1944		}
1945
1946		/* Square a field element. r = a * a
1947		*
1948		* r [in] Field element to hold result.
1949		* a [in] Field element to square.
1950		*/
1951		static WC_INLINE void fe448_sqr_8(sword32* r, const sword32* a)
1952		{
1953		sword64 o;
1954		sword64 t15;
1955		sword64 t;
1956		sword64 t0 = (sword64)a[ 0] * a[ 0];
1957		sword64 t1 = 2 * (sword64)a[ 0] * a[ 1];
1958		sword64 t2 = 2 * (sword64)a[ 0] * a[ 2];
1959		sword64 t102 = (sword64)a[ 1] * a[ 1];
1960		sword64 t3 = 2 * (sword64)a[ 0] * a[ 3];
1961		sword64 t103 = 2 * (sword64)a[ 1] * a[ 2];
1962		sword64 t4 = 2 * (sword64)a[ 0] * a[ 4];
1963		sword64 t104 = 2 * (sword64)a[ 1] * a[ 3];
1964		sword64 t204 = (sword64)a[ 2] * a[ 2];
1965		sword64 t5 = 2 * (sword64)a[ 0] * a[ 5];
1966		sword64 t105 = 2 * (sword64)a[ 1] * a[ 4];
1967		sword64 t205 = 2 * (sword64)a[ 2] * a[ 3];
1968		sword64 t6 = 2 * (sword64)a[ 0] * a[ 6];
1969		sword64 t106 = 2 * (sword64)a[ 1] * a[ 5];
1970		sword64 t206 = 2 * (sword64)a[ 2] * a[ 4];
1971		sword64 t306 = (sword64)a[ 3] * a[ 3];
1972		sword64 t7 = 2 * (sword64)a[ 0] * a[ 7];
1973		sword64 t107 = 2 * (sword64)a[ 1] * a[ 6];
1974		sword64 t207 = 2 * (sword64)a[ 2] * a[ 5];
1975		sword64 t307 = 2 * (sword64)a[ 3] * a[ 4];
1976		sword64 t8 = 2 * (sword64)a[ 1] * a[ 7];
1977		sword64 t108 = 2 * (sword64)a[ 2] * a[ 6];
1978		sword64 t208 = 2 * (sword64)a[ 3] * a[ 5];
1979		sword64 t308 = (sword64)a[ 4] * a[ 4];
1980		sword64 t9 = 2 * (sword64)a[ 2] * a[ 7];
1981		sword64 t109 = 2 * (sword64)a[ 3] * a[ 6];
1982		sword64 t209 = 2 * (sword64)a[ 4] * a[ 5];
1983		sword64 t10 = 2 * (sword64)a[ 3] * a[ 7];
1984		sword64 t110 = 2 * (sword64)a[ 4] * a[ 6];
1985		sword64 t210 = (sword64)a[ 5] * a[ 5];
1986		sword64 t11 = 2 * (sword64)a[ 4] * a[ 7];
1987		sword64 t111 = 2 * (sword64)a[ 5] * a[ 6];
1988		sword64 t12 = 2 * (sword64)a[ 5] * a[ 7];
1989		sword64 t112 = (sword64)a[ 6] * a[ 6];
1990		sword64 t13 = 2 * (sword64)a[ 6] * a[ 7];
1991		sword64 t14 = (sword64)a[ 7] * a[ 7];
1992		t2 += t102;
1993		t3 += t103;
1994		t4 += t104; t4 += t204;
1995		t5 += t105; t5 += t205;
1996		t6 += t106; t6 += t206; t6 += t306;
1997		t7 += t107; t7 += t207; t7 += t307;
1998		t8 += t108; t8 += t208; t8 += t308;
1999		t9 += t109; t9 += t209;
2000		t10 += t110; t10 += t210;
2001		t11 += t111;
2002		t12 += t112;
2003		o = t14 >> 28;
2004		t15 = o;
2005		t14 -= o << 28;
2006		o = (t0 >> 28); t1 += o; t = o << 28; t0 -= t;
2007		o = (t1 >> 28); t2 += o; t = o << 28; t1 -= t;
2008		o = (t2 >> 28); t3 += o; t = o << 28; t2 -= t;
2009		o = (t3 >> 28); t4 += o; t = o << 28; t3 -= t;
2010		o = (t4 >> 28); t5 += o; t = o << 28; t4 -= t;
2011		o = (t5 >> 28); t6 += o; t = o << 28; t5 -= t;
2012		o = (t6 >> 28); t7 += o; t = o << 28; t6 -= t;
2013		o = (t7 >> 28); t8 += o; t = o << 28; t7 -= t;
2014		o = (t8 >> 28); t9 += o; t = o << 28; t8 -= t;
2015		o = (t9 >> 28); t10 += o; t = o << 28; t9 -= t;
2016		o = (t10 >> 28); t11 += o; t = o << 28; t10 -= t;
2017		o = (t11 >> 28); t12 += o; t = o << 28; t11 -= t;
2018		o = (t12 >> 28); t13 += o; t = o << 28; t12 -= t;
2019		o = (t13 >> 28); t14 += o; t = o << 28; t13 -= t;
2020		o = (t14 >> 28); t15 += o; t = o << 28; t14 -= t;
2021		o = (t15 >> 28); t0 += o;
2022		t8 += o; t = o << 28; t15 -= t;
2023
2024		/* Store */
2025		r[0] = (sword32)t0;
2026		r[1] = (sword32)t1;
2027		r[2] = (sword32)t2;
2028		r[3] = (sword32)t3;
2029		r[4] = (sword32)t4;
2030		r[5] = (sword32)t5;
2031		r[6] = (sword32)t6;
2032		r[7] = (sword32)t7;
2033		r[8] = (sword32)t8;
2034		r[9] = (sword32)t9;
2035		r[10] = (sword32)t10;
2036		r[11] = (sword32)t11;
2037		r[12] = (sword32)t12;
2038		r[13] = (sword32)t13;
2039		r[14] = (sword32)t14;
2040		r[15] = (sword32)t15;
2041		}
2042
2043		/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
2044		*
2045		* r [in] Field element to hold result.
2046		* a [in] Field element to square.
2047		*/
2048		void fe448_sqr(sword32* r, const sword32* a)
2049		{
2050		sword32 r0[16];
2051		sword32 r1[16];
2052		sword32* a1 = r1;
2053		sword32 r2[16];
2054		a1[0] = a[0] + a[8];
2055		a1[1] = a[1] + a[9];
2056		a1[2] = a[2] + a[10];
2057		a1[3] = a[3] + a[11];
2058		a1[4] = a[4] + a[12];
2059		a1[5] = a[5] + a[13];
2060		a1[6] = a[6] + a[14];
2061		a1[7] = a[7] + a[15];
2062		fe448_sqr_8(r2, a + 8);
2063		fe448_sqr_8(r0, a);
2064		fe448_sqr_8(r1, a1);
2065		r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
2066		r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
2067		r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
2068		r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
2069		r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
2070		r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
2071		r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
2072		r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
2073		r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8];
2074		r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9];
2075		r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10];
2076		r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11];
2077		r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12];
2078		r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13];
2079		r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14];
2080		r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15];
2081		}
2082
2083		/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
2084		* Constant time implementation - using Fermat's little theorem:
2085		* a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
2086		* For curve448: p - 2 = 2^448 - 2^224 - 3
2087		*
2088		* r [in] Field element to hold result.
2089		* a [in] Field element to invert.
2090		*/
2091		void fe448_invert(sword32* r, const sword32* a)
2092		{
2093		sword32 t1[16];
2094		sword32 t2[16];
2095		sword32 t3[16];
2096		sword32 t4[16];
2097		int i;
2098
2099		fe448_sqr(t1, a);
2100		/* t1 = 2 */
2101		fe448_mul(t1, t1, a);
2102		/* t1 = 3 */
2103		fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2104		/* t2 = c */
2105		fe448_mul(t3, t2, a);
2106		/* t3 = d */
2107		fe448_mul(t1, t2, t1);
2108		/* t1 = f */
2109		fe448_sqr(t2, t1);
2110		/* t2 = 1e */
2111		fe448_mul(t4, t2, a);
2112		/* t4 = 1f */
2113		fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2114		/* t2 = 3e0 */
2115		fe448_mul(t1, t2, t4);
2116		/* t1 = 3ff */
2117		fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
2118		/* t2 = ffc00 */
2119		fe448_mul(t1, t2, t1);
2120		/* t1 = fffff */
2121		fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2122		/* t2 = 1ffffe0 */
2123		fe448_mul(t1, t2, t4);
2124		/* t1 = 1ffffff */
2125		fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
2126		/* t2 = 3fffffe000000 */
2127		fe448_mul(t1, t2, t1);
2128		/* t1 = 3ffffffffffff */
2129		fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2130		/* t2 = 7fffffffffffe0 */
2131		fe448_mul(t1, t2, t4);
2132		/* t1 = 7fffffffffffff */
2133		fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
2134		/* t2 = 3fffffffffffff80000000000000 */
2135		fe448_mul(t1, t2, t1);
2136		/* t1 = 3fffffffffffffffffffffffffff */
2137		fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
2138		/* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
2139		fe448_mul(t1, t2, t1);
2140		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2141		fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2142		/* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
2143		fe448_mul(t3, t3, t2);
2144		/* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2145		fe448_mul(t1, t3, a);
2146		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2147		fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
2148		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
2149		fe448_mul(r, t3, t1);
2150		/* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2151		}
2152
2153		/* Scalar multiply the point by a number. r = n.a
2154		* Uses Montgomery ladder and only requires the x-ordinate.
2155		*
2156		* r [in] Field element to hold result.
2157		* n [in] Scalar as an array of bytes.
2158		* a [in] Point to multiply - x-ordinate only.
2159		*/
2160		int curve448(byte* r, const byte* n, const byte* a)
2161		{
2162		sword32 x1[16];
2163		sword32 x2[16];
2164		sword32 z2[16];
2165		sword32 x3[16];
2166		sword32 z3[16];
2167		sword32 t0[16];
2168		sword32 t1[16];
2169		int i;
2170		unsigned int swap;
2171		unsigned int b;
2172
2173		fe448_from_bytes(x1, a);
2174		fe448_1(x2);
2175		fe448_0(z2);
2176		fe448_copy(x3, x1);
2177		fe448_1(z3);
2178
2179		swap = 0;
2180		for (i = 447; i >= 0; --i) {
2181		b = (n[i >> 3] >> (i & 7)) & 1;
2182		swap ^= b;
2183		fe448_cswap(x2, x3, swap);
2184		fe448_cswap(z2, z3, swap);
2185		swap = b;
2186
2187		/* Montgomery Ladder - double and add */
2188		fe448_add(t0, x2, z2);
2189		fe448_reduce(t0);
2190		fe448_add(t1, x3, z3);
2191		fe448_reduce(t1);
2192		fe448_sub(x2, x2, z2);
2193		fe448_sub(x3, x3, z3);
2194		fe448_mul(t1, t1, x2);
2195		fe448_mul(z3, x3, t0);
2196		fe448_sqr(t0, t0);
2197		fe448_sqr(x2, x2);
2198		fe448_add(x3, z3, t1);
2199		fe448_reduce(x3);
2200		fe448_sqr(x3, x3);
2201		fe448_sub(z3, z3, t1);
2202		fe448_sqr(z3, z3);
2203		fe448_mul(z3, z3, x1);
2204		fe448_sub(t1, t0, x2);
2205		fe448_mul(x2, t0, x2);
2206		fe448_mul39081(z2, t1);
2207		fe448_add(z2, t0, z2);
2208		fe448_mul(z2, z2, t1);
2209		}
2210		/* Last two bits are 0 - no final swap check required. */
2211
2212		fe448_invert(z2, z2);
2213		fe448_mul(x2, x2, z2);
2214		fe448_to_bytes(r, x2);
2215
2216		return 0;
2217		}
2218
2219		#ifdef HAVE_ED448
2220		/* Check whether field element is not 0.
2221		* Must convert to a normalized form before checking.
2222		*
2223		* a [in] Field element.
2224		* returns 0 when zero, and any other value otherwise.
2225		*/
2226		int fe448_isnonzero(const sword32* a)
2227		{
2228		byte b[56];
2229		int i;
2230		byte c = 0;
2231		fe448_to_bytes(b, a);
2232		for (i = 0; i < 56; i++)
2233		c \|= b[i];
2234		return c;
2235		}
2236
2237		/* Check whether field element is negative.
2238		* Must convert to a normalized form before checking.
2239		*
2240		* a [in] Field element.
2241		* returns 1 when negative, and 0 otherwise.
2242		*/
2243		int fe448_isnegative(const sword32* a)
2244		{
2245		byte b[56];
2246		fe448_to_bytes(b, a);
2247		return b[0] & 1;
2248		}
2249
2250		/* Negates the field element. r = -a
2251		*
2252		* r [in] Field element to hold result.
2253		* a [in] Field element.
2254		*/
2255		void fe448_neg(sword32* r, const sword32* a)
2256		{
2257		r[0] = -a[0];
2258		r[1] = -a[1];
2259		r[2] = -a[2];
2260		r[3] = -a[3];
2261		r[4] = -a[4];
2262		r[5] = -a[5];
2263		r[6] = -a[6];
2264		r[7] = -a[7];
2265		r[8] = -a[8];
2266		r[9] = -a[9];
2267		r[10] = -a[10];
2268		r[11] = -a[11];
2269		r[12] = -a[12];
2270		r[13] = -a[13];
2271		r[14] = -a[14];
2272		r[15] = -a[15];
2273		}
2274
2275		/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
2276		* Used for calcualting y-ordinate from x-ordinate for Ed448.
2277		*
2278		* r [in] Field element to hold result.
2279		* a [in] Field element to exponentiate.
2280		*/
2281		void fe448_pow_2_446_222_1(sword32* r, const sword32* a)
2282		{
2283		sword32 t1[16];
2284		sword32 t2[16];
2285		sword32 t3[16];
2286		sword32 t4[16];
2287		sword32 t5[16];
2288		int i;
2289
2290		fe448_sqr(t3, a);
2291		/* t3 = 2 */
2292		fe448_mul(t1, t3, a);
2293		/* t1 = 3 */
2294		fe448_sqr(t5, t1);
2295		/* t5 = 6 */
2296		fe448_mul(t5, t5, a);
2297		/* t5 = 7 */
2298		fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2299		/* t2 = c */
2300		fe448_mul(t3, t2, t3);
2301		/* t3 = e */
2302		fe448_mul(t1, t2, t1);
2303		/* t1 = f */
2304		fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
2305		/* t2 = 78 */
2306		fe448_mul(t5, t2, t5);
2307		/* t5 = 7f */
2308		fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2309		/* t2 = f0 */
2310		fe448_mul(t1, t2, t1);
2311		/* t1 = ff */
2312		fe448_mul(t3, t3, t2);
2313		/* t3 = fe */
2314		fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
2315		/* t2 = 7f80 */
2316		fe448_mul(t5, t2, t5);
2317		/* t5 = 7fff */
2318		fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
2319		/* t2 = ff00 */
2320		fe448_mul(t1, t2, t1);
2321		/* t1 = ffff */
2322		fe448_mul(t3, t3, t2);
2323		/* t3 = fffe */
2324		fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
2325		/* t2 = 3fff8000 */
2326		fe448_mul(t5, t2, t5);
2327		/* t5 = 3fffffff */
2328		fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
2329		/* t2 = ffff0000 */
2330		fe448_mul(t1, t2, t1);
2331		/* t1 = ffffffff */
2332		fe448_mul(t3, t3, t2);
2333		/* t3 = fffffffe */
2334		fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2335		/* t2 = ffffffff00000000 */
2336		fe448_mul(t2, t2, t1);
2337		/* t2 = ffffffffffffffff */
2338		fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2339		/* t1 = ffffffffffffffff0000000000000000 */
2340		fe448_mul(t1, t1, t2);
2341		/* t1 = ffffffffffffffffffffffffffffffff */
2342		fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2343		/* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
2344		fe448_mul(t4, t1, t2);
2345		/* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
2346		fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2347		/* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
2348		fe448_mul(t3, t3, t2);
2349		/* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2350		fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
2351		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
2352		fe448_mul(t1, t1, t4);
2353		/* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
2354		fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
2355		/* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
2356		fe448_mul(r, t5, t1);
2357		/* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2358		}
2359
2360		/* Constant time, conditional move of b into a.
2361		* a is not changed if the condition is 0.
2362		*
2363		* a A field element.
2364		* b A field element.
2365		* c If 1 then copy and if 0 then don't copy.
2366		*/
2367		void fe448_cmov(sword32* a, const sword32* b, int c)
2368		{
2369		sword32 m = -(sword32)c;
2370		sword32 t0 = m & (a[0] ^ b[0]);
2371		sword32 t1 = m & (a[1] ^ b[1]);
2372		sword32 t2 = m & (a[2] ^ b[2]);
2373		sword32 t3 = m & (a[3] ^ b[3]);
2374		sword32 t4 = m & (a[4] ^ b[4]);
2375		sword32 t5 = m & (a[5] ^ b[5]);
2376		sword32 t6 = m & (a[6] ^ b[6]);
2377		sword32 t7 = m & (a[7] ^ b[7]);
2378		sword32 t8 = m & (a[8] ^ b[8]);
2379		sword32 t9 = m & (a[9] ^ b[9]);
2380		sword32 t10 = m & (a[10] ^ b[10]);
2381		sword32 t11 = m & (a[11] ^ b[11]);
2382		sword32 t12 = m & (a[12] ^ b[12]);
2383		sword32 t13 = m & (a[13] ^ b[13]);
2384		sword32 t14 = m & (a[14] ^ b[14]);
2385		sword32 t15 = m & (a[15] ^ b[15]);
2386
2387		a[0] ^= t0;
2388		a[1] ^= t1;
2389		a[2] ^= t2;
2390		a[3] ^= t3;
2391		a[4] ^= t4;
2392		a[5] ^= t5;
2393		a[6] ^= t6;
2394		a[7] ^= t7;
2395		a[8] ^= t8;
2396		a[9] ^= t9;
2397		a[10] ^= t10;
2398		a[11] ^= t11;
2399		a[12] ^= t12;
2400		a[13] ^= t13;
2401		a[14] ^= t14;
2402		a[15] ^= t15;
2403		}
2404
2405		#endif /* HAVE_ED448 */
2406		#endif
2407
2408		#endif /* HAVE_CURVE448 \|\| HAVE_ED448 */

Coverage Report

Created: 2022-08-24 06:28