/src/wolfssl-heapmath/wolfcrypt/src/fe_x25519_128.h

Line	Count	Source
1		/* fe_x25519_128.h
2		*
3		* Copyright (C) 2006-2026 wolfSSL Inc.
4		*
5		* This file is part of wolfSSL.
6		*
7		* wolfSSL is free software; you can redistribute it and/or modify
8		* it under the terms of the GNU General Public License as published by
9		* the Free Software Foundation; either version 3 of the License, or
10		* (at your option) any later version.
11		*
12		* wolfSSL is distributed in the hope that it will be useful,
13		* but WITHOUT ANY WARRANTY; without even the implied warranty of
14		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		* GNU General Public License for more details.
16		*
17		* You should have received a copy of the GNU General Public License
18		* along with this program; if not, write to the Free Software
19		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20		*/
21
22		/* Generated using (from wolfssl):
23		* cd ../scripts
24		* ruby ./x25519/fe_x25519_128_gen.rb > ../wolfssl/wolfcrypt/src/fe_x25519_128.h
25		*/
26
27		void fe_init(void)
28	10.4k	{
29	10.4k	}
30
31		/* Convert a number represented as an array of bytes to an array of words with
32		* 51-bits of data in each word.
33		*
34		* in An array of bytes.
35		* out An array of words.
36		*/
37		void fe_frombytes(fe out, const unsigned char *in)
38	9.21k	{
39	9.21k	out[0] = (sword64)(
40	9.21k	(((word64)((in[ 0] ) )) )
41	9.21k	\| (((word64)((in[ 1] ) )) << 8)
42	9.21k	\| (((word64)((in[ 2] ) )) << 16)
43	9.21k	\| (((word64)((in[ 3] ) )) << 24)
44	9.21k	\| (((word64)((in[ 4] ) )) << 32)
45	9.21k	\| (((word64)((in[ 5] ) )) << 40)
46	9.21k	\| (((word64)((in[ 6] ) & 0x07)) << 48));
47	9.21k	out[1] = (sword64)(
48	9.21k	(((word64)((in[ 6] >> 3) & 0x1f)) )
49	9.21k	\| (((word64)((in[ 7] ) )) << 5)
50	9.21k	\| (((word64)((in[ 8] ) )) << 13)
51	9.21k	\| (((word64)((in[ 9] ) )) << 21)
52	9.21k	\| (((word64)((in[10] ) )) << 29)
53	9.21k	\| (((word64)((in[11] ) )) << 37)
54	9.21k	\| (((word64)((in[12] ) & 0x3f)) << 45));
55	9.21k	out[2] = (sword64)(
56	9.21k	(((word64)((in[12] >> 6) & 0x03)) )
57	9.21k	\| (((word64)((in[13] ) )) << 2)
58	9.21k	\| (((word64)((in[14] ) )) << 10)
59	9.21k	\| (((word64)((in[15] ) )) << 18)
60	9.21k	\| (((word64)((in[16] ) )) << 26)
61	9.21k	\| (((word64)((in[17] ) )) << 34)
62	9.21k	\| (((word64)((in[18] ) )) << 42)
63	9.21k	\| (((word64)((in[19] ) & 0x01)) << 50));
64	9.21k	out[3] = (sword64)(
65	9.21k	(((word64)((in[19] >> 1) & 0x7f)) )
66	9.21k	\| (((word64)((in[20] ) )) << 7)
67	9.21k	\| (((word64)((in[21] ) )) << 15)
68	9.21k	\| (((word64)((in[22] ) )) << 23)
69	9.21k	\| (((word64)((in[23] ) )) << 31)
70	9.21k	\| (((word64)((in[24] ) )) << 39)
71	9.21k	\| (((word64)((in[25] ) & 0x0f)) << 47));
72	9.21k	out[4] = (sword64)(
73	9.21k	(((word64)((in[25] >> 4) & 0x0f)) )
74	9.21k	\| (((word64)((in[26] ) )) << 4)
75	9.21k	\| (((word64)((in[27] ) )) << 12)
76	9.21k	\| (((word64)((in[28] ) )) << 20)
77	9.21k	\| (((word64)((in[29] ) )) << 28)
78	9.21k	\| (((word64)((in[30] ) )) << 36)
79	9.21k	\| (((word64)((in[31] ) & 0x7f)) << 44));
80	9.21k	}
81
82		/* Convert a number represented as an array of words to an array of bytes.
83		* The array of words is normalized to an array of 51-bit data words and if
84		* greater than the mod, modulo reduced by the prime 2^255 - 1.
85		*
86		* n An array of words.
87		* out An array of bytes.
88		*/
89		void fe_tobytes(unsigned char *out, const fe n)
90	8.72k	{
91	8.72k	fe in;
92	8.72k	sword64 c;
93
94	8.72k	in[0] = n[0];
95	8.72k	in[1] = n[1];
96	8.72k	in[2] = n[2];
97	8.72k	in[3] = n[3];
98	8.72k	in[4] = n[4];
99
100		/* Normalize to 51-bits of data per word. */
101	8.72k	in[0] += (in[4] >> 51) * 19; in[4] &= 0x7ffffffffffff;
102
103	8.72k	in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
104	8.72k	in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
105	8.72k	in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
106	8.72k	in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
107	8.72k	in[0] += (in[4] >> 51) * 19;
108	8.72k	in[4] &= 0x7ffffffffffff;
109
110	8.72k	c = (in[0] + 19) >> 51;
111	8.72k	c = (in[1] + c) >> 51;
112	8.72k	c = (in[2] + c) >> 51;
113	8.72k	c = (in[3] + c) >> 51;
114	8.72k	c = (in[4] + c) >> 51;
115	8.72k	in[0] += c * 19;
116	8.72k	in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
117	8.72k	in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
118	8.72k	in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
119	8.72k	in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
120	8.72k	in[4] &= 0x7ffffffffffff;
121
122	8.72k	out[ 0] = (((byte)((in[0] ) )) );
123	8.72k	out[ 1] = (((byte)((in[0] >> 8) )) );
124	8.72k	out[ 2] = (((byte)((in[0] >> 16) )) );
125	8.72k	out[ 3] = (((byte)((in[0] >> 24) )) );
126	8.72k	out[ 4] = (((byte)((in[0] >> 32) )) );
127	8.72k	out[ 5] = (((byte)((in[0] >> 40) )) );
128	8.72k	out[ 6] = (byte)((((byte)((in[0] >> 48) & 0x07)) )
129	8.72k	\| (((byte)((in[1] ) & 0x1f)) << 3));
130	8.72k	out[ 7] = (((byte)((in[1] >> 5) )) );
131	8.72k	out[ 8] = (((byte)((in[1] >> 13) )) );
132	8.72k	out[ 9] = (((byte)((in[1] >> 21) )) );
133	8.72k	out[10] = (((byte)((in[1] >> 29) )) );
134	8.72k	out[11] = (((byte)((in[1] >> 37) )) );
135	8.72k	out[12] = (byte)((((byte)((in[1] >> 45) & 0x3f)) )
136	8.72k	\| (((byte)((in[2] ) & 0x03)) << 6));
137	8.72k	out[13] = (((byte)((in[2] >> 2) )) );
138	8.72k	out[14] = (((byte)((in[2] >> 10) )) );
139	8.72k	out[15] = (((byte)((in[2] >> 18) )) );
140	8.72k	out[16] = (((byte)((in[2] >> 26) )) );
141	8.72k	out[17] = (((byte)((in[2] >> 34) )) );
142	8.72k	out[18] = (((byte)((in[2] >> 42) )) );
143	8.72k	out[19] = (byte)((((byte)((in[2] >> 50) & 0x01)) )
144	8.72k	\| (((byte)((in[3] ) & 0x7f)) << 1));
145	8.72k	out[20] = (((byte)((in[3] >> 7) )) );
146	8.72k	out[21] = (((byte)((in[3] >> 15) )) );
147	8.72k	out[22] = (((byte)((in[3] >> 23) )) );
148	8.72k	out[23] = (((byte)((in[3] >> 31) )) );
149	8.72k	out[24] = (((byte)((in[3] >> 39) )) );
150	8.72k	out[25] = (byte)((((byte)((in[3] >> 47) & 0x0f)) )
151	8.72k	\| (((byte)((in[4] ) & 0x0f)) << 4));
152	8.72k	out[26] = (((byte)((in[4] >> 4) )) );
153	8.72k	out[27] = (((byte)((in[4] >> 12) )) );
154	8.72k	out[28] = (((byte)((in[4] >> 20) )) );
155	8.72k	out[29] = (((byte)((in[4] >> 28) )) );
156	8.72k	out[30] = (((byte)((in[4] >> 36) )) );
157	8.72k	out[31] = (((byte)((in[4] >> 44) & 0x7f)) );
158	8.72k	}
159
160		/* Set the field element to 1.
161		*
162		* n The field element number.
163		*/
164		void fe_1(fe n)
165	122k	{
166	122k	n[0] = 0x0000000000001;
167	122k	n[1] = 0x0000000000000;
168	122k	n[2] = 0x0000000000000;
169	122k	n[3] = 0x0000000000000;
170	122k	n[4] = 0x0000000000000;
171	122k	}
172
173		/* Set the field element to 0.
174		*
175		* n The field element number.
176		*/
177		void fe_0(fe n)
178	63.6k	{
179	63.6k	n[0] = 0x0000000000000;
180	63.6k	n[1] = 0x0000000000000;
181	63.6k	n[2] = 0x0000000000000;
182	63.6k	n[3] = 0x0000000000000;
183	63.6k	n[4] = 0x0000000000000;
184	63.6k	}
185
186		/* Copy field element a into field element r.
187		*
188		* r Field element to copy into.
189		* a Field element to copy.
190		*/
191		void fe_copy(fe r, const fe a)
192	10.2k	{
193	10.2k	r[0] = a[0];
194	10.2k	r[1] = a[1];
195	10.2k	r[2] = a[2];
196	10.2k	r[3] = a[3];
197	10.2k	r[4] = a[4];
198	10.2k	}
199
200		/* Constant time, conditional swap of field elements a and b.
201		*
202		* f A field element.
203		* g A field element.
204		* b If 1 then swap and if 0 then don't swap.
205		*/
206		void fe_cswap(fe f, fe g, int b)
207	4.31M	{
208	4.31M	sword64 m = b;
209	4.31M	sword64 t0, t1, t2, t3, t4;
210
211		/* Convert conditional into mask. */
212	4.31M	m = -m;
213	4.31M	t0 = m & (f[0] ^ g[0]);
214	4.31M	t1 = m & (f[1] ^ g[1]);
215	4.31M	t2 = m & (f[2] ^ g[2]);
216	4.31M	t3 = m & (f[3] ^ g[3]);
217	4.31M	t4 = m & (f[4] ^ g[4]);
218
219	4.31M	f[0] ^= t0;
220	4.31M	f[1] ^= t1;
221	4.31M	f[2] ^= t2;
222	4.31M	f[3] ^= t3;
223	4.31M	f[4] ^= t4;
224
225	4.31M	g[0] ^= t0;
226	4.31M	g[1] ^= t1;
227	4.31M	g[2] ^= t2;
228	4.31M	g[3] ^= t3;
229	4.31M	g[4] ^= t4;
230	4.31M	}
231
232		/* Subtract b from a into r. (r = a - b)
233		*
234		* r A field element.
235		* a A field element.
236		* b A field element.
237		*/
238		void fe_sub(fe r, const fe a, const fe b)
239	4.73M	{
240	4.73M	r[0] = a[0] - b[0];
241	4.73M	r[1] = a[1] - b[1];
242	4.73M	r[2] = a[2] - b[2];
243	4.73M	r[3] = a[3] - b[3];
244	4.73M	r[4] = a[4] - b[4];
245	4.73M	}
246
247		/* Add b to a into r. (r = a + b)
248		*
249		* r A field element.
250		* a A field element.
251		* b A field element.
252		*/
253		void fe_add(fe r, const fe a, const fe b)
254	4.74M	{
255	4.74M	r[0] = a[0] + b[0];
256	4.74M	r[1] = a[1] + b[1];
257	4.74M	r[2] = a[2] + b[2];
258	4.74M	r[3] = a[3] + b[3];
259	4.74M	r[4] = a[4] + b[4];
260	4.74M	}
261
262		/* Multiply a and b into r. (r = a * b)
263		*
264		* r A field element.
265		* a A field element.
266		* b A field element.
267		*/
268		void fe_mul(fe r, const fe a, const fe b)
269	6.22M	{
270	6.22M	const __int128_t k19 = 19;
271	6.22M	__int128_t t0 = ((__int128_t)a[0]) * b[0];
272	6.22M	__int128_t t1 = ((__int128_t)a[0]) * b[1]
273	6.22M	+ ((__int128_t)a[1]) * b[0];
274	6.22M	__int128_t t2 = ((__int128_t)a[0]) * b[2]
275	6.22M	+ ((__int128_t)a[1]) * b[1]
276	6.22M	+ ((__int128_t)a[2]) * b[0];
277	6.22M	__int128_t t3 = ((__int128_t)a[0]) * b[3]
278	6.22M	+ ((__int128_t)a[1]) * b[2]
279	6.22M	+ ((__int128_t)a[2]) * b[1]
280	6.22M	+ ((__int128_t)a[3]) * b[0];
281	6.22M	__int128_t t4 = ((__int128_t)a[0]) * b[4]
282	6.22M	+ ((__int128_t)a[1]) * b[3]
283	6.22M	+ ((__int128_t)a[2]) * b[2]
284	6.22M	+ ((__int128_t)a[3]) * b[1]
285	6.22M	+ ((__int128_t)a[4]) * b[0];
286	6.22M	__int128_t t5 = ((__int128_t)a[1]) * b[4]
287	6.22M	+ ((__int128_t)a[2]) * b[3]
288	6.22M	+ ((__int128_t)a[3]) * b[2]
289	6.22M	+ ((__int128_t)a[4]) * b[1];
290	6.22M	__int128_t t6 = ((__int128_t)a[2]) * b[4]
291	6.22M	+ ((__int128_t)a[3]) * b[3]
292	6.22M	+ ((__int128_t)a[4]) * b[2];
293	6.22M	__int128_t t7 = ((__int128_t)a[3]) * b[4]
294	6.22M	+ ((__int128_t)a[4]) * b[3];
295	6.22M	__int128_t t8 = ((__int128_t)a[4]) * b[4];
296
297		/* Modulo reduce double long word. */
298	6.22M	t0 += t5 * k19;
299	6.22M	t1 += t6 * k19;
300	6.22M	t2 += t7 * k19;
301	6.22M	t3 += t8 * k19;
302
303		/* Normalize to 51-bits of data per word. */
304	6.22M	t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
305
306	6.22M	t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
307	6.22M	t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
308	6.22M	t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
309	6.22M	t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
310	6.22M	r[0] += (sword64)((t4 >> 51) * k19);
311	6.22M	r[4] = t4 & 0x7ffffffffffff;
312	6.22M	}
313
314		/* Square a and put result in r. (r = a * a)
315		*
316		* r A field element.
317		* a A field element.
318		* b A field element.
319		*/
320		void fe_sq(fe r, const fe a)
321	6.07M	{
322	6.07M	const __int128_t k19 = 19;
323	6.07M	const __int128_t k2 = 2;
324	6.07M	__int128_t t0 = ((__int128_t)a[0]) * a[0];
325	6.07M	__int128_t t1 = ((__int128_t)a[0]) * a[1] * k2;
326	6.07M	__int128_t t2 = ((__int128_t)a[0]) * a[2] * k2
327	6.07M	+ ((__int128_t)a[1]) * a[1];
328	6.07M	__int128_t t3 = ((__int128_t)a[0]) * a[3] * k2
329	6.07M	+ ((__int128_t)a[1]) * a[2] * k2;
330	6.07M	__int128_t t4 = ((__int128_t)a[0]) * a[4] * k2
331	6.07M	+ ((__int128_t)a[1]) * a[3] * k2
332	6.07M	+ ((__int128_t)a[2]) * a[2];
333	6.07M	__int128_t t5 = ((__int128_t)a[1]) * a[4] * k2
334	6.07M	+ ((__int128_t)a[2]) * a[3] * k2;
335	6.07M	__int128_t t6 = ((__int128_t)a[2]) * a[4] * k2
336	6.07M	+ ((__int128_t)a[3]) * a[3];
337	6.07M	__int128_t t7 = ((__int128_t)a[3]) * a[4] * k2;
338	6.07M	__int128_t t8 = ((__int128_t)a[4]) * a[4];
339
340		/* Modulo reduce double long word. */
341	6.07M	t0 += t5 * k19;
342	6.07M	t1 += t6 * k19;
343	6.07M	t2 += t7 * k19;
344	6.07M	t3 += t8 * k19;
345
346		/* Normalize to 51-bits of data per word. */
347	6.07M	t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
348
349	6.07M	t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
350	6.07M	t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
351	6.07M	t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
352	6.07M	t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
353	6.07M	r[0] += (sword64)((t4 >> 51) * k19);
354	6.07M	r[4] = t4 & 0x7ffffffffffff;
355	6.07M	}
356
357		/* Multiply a by 121666 and put result in r. (r = 121666 * a)
358		*
359		* r A field element.
360		* a A field element.
361		* b A field element.
362		*/
363		void fe_mul121666(fe r, fe a)
364	1.06M	{
365	1.06M	const __int128_t k19 = 19;
366	1.06M	const __int128_t k121666 = 121666;
367	1.06M	__int128_t t0 = ((__int128_t)a[0]) * k121666;
368	1.06M	__int128_t t1 = ((__int128_t)a[1]) * k121666;
369	1.06M	__int128_t t2 = ((__int128_t)a[2]) * k121666;
370	1.06M	__int128_t t3 = ((__int128_t)a[3]) * k121666;
371	1.06M	__int128_t t4 = ((__int128_t)a[4]) * k121666;
372
373		/* Normalize to 51-bits of data per word. */
374	1.06M	t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
375
376	1.06M	t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
377	1.06M	t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
378	1.06M	t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
379	1.06M	t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
380	1.06M	r[0] += (sword64)((t4 >> 51) * k19);
381	1.06M	r[4] = t4 & 0x7ffffffffffff;
382	1.06M	}
383
384		/* Find the inverse of a modulo 2^255 - 1 and put result in r.
385		* (r * a) mod (2^255 - 1) = 1
386		* Implementation is constant time.
387		*
388		* r A field element.
389		* a A field element.
390		*/
391		void fe_invert(fe r, const fe a)
392	5.39k	{
393	5.39k	fe t0, t1, t2, t3;
394	5.39k	int i;
395
396		/* a ^ (2^255 - 21) */
397	5.39k	fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0);
398	10.7k	fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1);
399	5.39k	fe_mul(t0, t0, t1);
400	5.39k	fe_sq(t2, t0); for (i = 1; i < 1; ++i) fe_sq(t2, t2); fe_mul(t1, t1, t2);
401	26.9k	fe_sq(t2, t1); for (i = 1; i < 5; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
402	53.9k	fe_sq(t2, t1); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
403	107k	fe_sq(t3, t2); for (i = 1; i < 20; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
404	53.9k	fe_sq(t2, t2); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
405	269k	fe_sq(t2, t1); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
406	539k	fe_sq(t3, t2); for (i = 1; i < 100; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
407	269k	fe_sq(t2, t2); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
408	26.9k	fe_sq(t1, t1); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul( r, t1, t0);
409	5.39k	}
410
411		#ifndef CURVE25519_SMALL
412		#ifndef WOLFSSL_CURVE25519_BLINDING
413		/* Scalar multiply the field element a by n using Montgomery Ladder and places
414		* result in r.
415		*
416		* r A field element as an array of bytes.
417		* n The scalar as an array of bytes.
418		* a A field element as an array of bytes.
419		*/
420		int curve25519(byte* r, const byte* n, const byte* a)
421		{
422		fe x1, x2, z2, x3, z3;
423		fe t0, t1;
424		int pos;
425		unsigned int swap;
426		unsigned int b;
427
428		fe_frombytes(x1, a);
429		fe_1(x2);
430		fe_0(z2);
431		fe_copy(x3, x1);
432		fe_1(z3);
433
434		swap = 0;
435		for (pos = 254;pos >= 0;--pos) {
436		b = (unsigned int)(n[pos / 8] >> (pos & 7));
437		b &= 1;
438		swap ^= b;
439		fe_cswap(x2, x3, (int)swap);
440		fe_cswap(z2, z3, (int)swap);
441		swap = b;
442
443		fe_sub(t0, x3, z3);
444		fe_sub(t1, x2, z2);
445		fe_add(x2, x2, z2);
446		fe_add(z2, x3, z3);
447		fe_mul(z3, t0, x2);
448		fe_mul(z2, z2, t1);
449		fe_sq(t0, t1);
450		fe_sq(t1, x2);
451		fe_add(x3, z3, z2);
452		fe_sub(z2, z3, z2);
453		fe_mul(x2, t1, t0);
454		fe_sub(t1, t1, t0);
455		fe_sq(z2, z2);
456		fe_mul121666(z3, t1);
457		fe_sq(x3, x3);
458		fe_add(t0, t0, z3);
459		fe_mul(z3, x1, z2);
460		fe_mul(z2, t1, t0);
461		}
462		fe_cswap(x2, x3, (int)swap);
463		fe_cswap(z2, z3, (int)swap);
464
465		fe_invert(z2, z2);
466		fe_mul(x2, x2, z2);
467		fe_tobytes(r, x2);
468
469		return 0;
470		}
471		#else
472		int curve25519_blind(byte* r, const byte* n, const byte* mask, const byte* a,
473		const byte* rz)
474	4.16k	{
475	4.16k	fe x1, x2, z2, x3, z3;
476	4.16k	fe t0, t1;
477	4.16k	int pos;
478	4.16k	unsigned int b;
479
480	4.16k	fe_frombytes(x1, a);
481	4.16k	fe_1(x2);
482	4.16k	fe_0(z2);
483	4.16k	fe_copy(x3, x1);
484	4.16k	fe_frombytes(z3, rz);
485	4.16k	fe_mul(x3, x3, z3);
486
487		/* mask_bits[252] */
488	4.16k	b = (unsigned int)(mask[31] >> 7);
489	4.16k	b &= 1;
490	4.16k	fe_cswap(x2,x3,(int)b);
491	4.16k	fe_cswap(z2,z3,(int)b);
492	1.06M	for (pos = 255;pos >= 1;--pos) {
493	1.06M	b = (unsigned int)(n[pos / 8] >> (pos & 7));
494	1.06M	b &= 1;
495	1.06M	fe_cswap(x2, x3, (int)b);
496	1.06M	fe_cswap(z2, z3, (int)b);
497
498		/* montgomery */
499	1.06M	fe_sub(t0, x3, z3);
500	1.06M	fe_sub(t1, x2, z2);
501	1.06M	fe_add(x2, x2, z2);
502	1.06M	fe_add(z2, x3, z3);
503	1.06M	fe_mul(z3, t0, x2);
504	1.06M	fe_mul(z2, z2, t1);
505	1.06M	fe_sq(t0, t1);
506	1.06M	fe_sq(t1, x2);
507	1.06M	fe_add(x3, z3, z2);
508	1.06M	fe_sub(z2, z3, z2);
509	1.06M	fe_mul(x2, t1, t0);
510	1.06M	fe_sub(t1, t1, t0);
511	1.06M	fe_sq(z2, z2);
512	1.06M	fe_mul121666(z3, t1);
513	1.06M	fe_sq(x3, x3);
514	1.06M	fe_add(t0, t0, z3);
515	1.06M	fe_mul(z3, x1, z2);
516	1.06M	fe_mul(z2, t1, t0);
517
518	1.06M	b = (unsigned int)(mask[(pos - 1) / 8] >> ((pos - 1) & 7));
519	1.06M	b &= 1;
520	1.06M	fe_cswap(x2, x3, (int)b);
521	1.06M	fe_cswap(z2, z3, (int)b);
522	1.06M	}
523	4.16k	b = (unsigned int)(n[0] & 1);
524	4.16k	fe_cswap(x2, x3, (int)b);
525	4.16k	fe_cswap(z2, z3, (int)b);
526
527	4.16k	fe_invert(z2, z2);
528	4.16k	fe_mul(x2, x2, z2);
529	4.16k	fe_tobytes(r, x2);
530
531	4.16k	return 0;
532	4.16k	}
533		#endif /* WOLFSSL_CURVE25519_BLINDING */
534		#endif /* !CURVE25519_SMALL */
535
536		/* The field element value 0 as an array of bytes. */
537		static const unsigned char zero[32] = {0};
538
539		/* Constant time check as to whether a is not 0.
540		*
541		* a A field element.
542		*/
543		int fe_isnonzero(const fe a)
544	1.27k	{
545	1.27k	unsigned char s[32];
546	1.27k	fe_tobytes(s, a);
547	1.27k	return ConstantCompare(s, zero, 32);
548	1.27k	}
549
550		/* Checks whether a is negative.
551		*
552		* a A field element.
553		*/
554		int fe_isnegative(const fe a)
555	2.05k	{
556	2.05k	unsigned char s[32];
557	2.05k	fe_tobytes(s, a);
558	2.05k	return s[0] & 1;
559	2.05k	}
560
561		/* Negates field element a and stores the result in r.
562		*
563		* r A field element.
564		* a A field element.
565		*/
566		void fe_neg(fe r, const fe a)
567	58.6k	{
568	58.6k	r[0] = -a[0];
569	58.6k	r[1] = -a[1];
570	58.6k	r[2] = -a[2];
571	58.6k	r[3] = -a[3];
572	58.6k	r[4] = -a[4];
573	58.6k	}
574
575		/* Constant time, conditional move of b into a.
576		* a is not changed if the condition is 0.
577		*
578		* f A field element.
579		* g A field element.
580		* b If 1 then copy and if 0 then don't copy.
581		*/
582		void fe_cmov(fe f, const fe g, int b)
583	1.45M	{
584	1.45M	sword64 m = b;
585	1.45M	sword64 t0, t1, t2, t3, t4;
586
587		/* Convert conditional into mask. */
588	1.45M	m = -m;
589	1.45M	t0 = m & (f[0] ^ g[0]);
590	1.45M	t1 = m & (f[1] ^ g[1]);
591	1.45M	t2 = m & (f[2] ^ g[2]);
592	1.45M	t3 = m & (f[3] ^ g[3]);
593	1.45M	t4 = m & (f[4] ^ g[4]);
594
595	1.45M	f[0] ^= t0;
596	1.45M	f[1] ^= t1;
597	1.45M	f[2] ^= t2;
598	1.45M	f[3] ^= t3;
599	1.45M	f[4] ^= t4;
600	1.45M	}
601
602		void fe_pow22523(fe r, const fe a)
603	867	{
604	867	fe t0, t1, t2;
605	867	int i;
606
607		/* a ^ (2^255 - 23) */
608	867	fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0);
609	1.73k	fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1);
610	867	fe_mul(t0, t0, t1);
611	867	fe_sq(t0, t0); for (i = 1; i < 1; ++i) fe_sq(t0, t0); fe_mul(t0, t1, t0);
612	4.33k	fe_sq(t1, t0); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
613	8.67k	fe_sq(t1, t0); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
614	17.3k	fe_sq(t2, t1); for (i = 1; i < 20; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
615	8.67k	fe_sq(t1, t1); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
616	43.3k	fe_sq(t1, t0); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
617	86.7k	fe_sq(t2, t1); for (i = 1; i < 100; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
618	43.3k	fe_sq(t1, t1); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
619	1.73k	fe_sq(t0, t0); for (i = 1; i < 2; ++i) fe_sq(t0, t0); fe_mul( r, t0, a);
620
621	867	return;
622	867	}
623
624		/* Double the square of a and put result in r. (r = 2 * a * a)
625		*
626		* r A field element.
627		* a A field element.
628		* b A field element.
629		*/
630		void fe_sq2(fe r, const fe a)
631	80.9k	{
632	80.9k	const __int128_t k2 = 2;
633	80.9k	const __int128_t k19 = 19;
634	80.9k	__int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]);
635	80.9k	__int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2);
636	80.9k	__int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2
637	80.9k	+ ((__int128_t)a[1]) * a[1]);
638	80.9k	__int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2
639	80.9k	+ ((__int128_t)a[1]) * a[2] * k2);
640	80.9k	__int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2
641	80.9k	+ ((__int128_t)a[1]) * a[3] * k2
642	80.9k	+ ((__int128_t)a[2]) * a[2]);
643	80.9k	__int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2
644	80.9k	+ ((__int128_t)a[2]) * a[3] * k2);
645	80.9k	__int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2
646	80.9k	+ ((__int128_t)a[3]) * a[3]);
647	80.9k	__int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2);
648	80.9k	__int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]);
649
650		/* Modulo reduce double long word. */
651	80.9k	t0 += t5 * k19;
652	80.9k	t1 += t6 * k19;
653	80.9k	t2 += t7 * k19;
654	80.9k	t3 += t8 * k19;
655
656		/* Normalize to 51-bits of data per word. */
657	80.9k	t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
658
659	80.9k	t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
660	80.9k	t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
661	80.9k	t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
662	80.9k	t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
663	80.9k	r[0] += (sword64)((t4 >> 51) * k19);
664	80.9k	r[4] = t4 & 0x7ffffffffffff;
665	80.9k	}
666
667		/* Load 3 little endian bytes into a 64-bit word.
668		*
669		* in An array of bytes.
670		* returns a 64-bit word.
671		*/
672		sword64 load_3(const unsigned char *in)
673	0	{
674	0	word64 result;
675
676	0	result = ((((word64)in[0]) ) \|
677	0	(((word64)in[1]) << 8) \|
678	0	(((word64)in[2]) << 16));
679
680	0	return (sword64)result;
681	0	}
682
683		/* Load 4 little endian bytes into a 64-bit word.
684		*
685		* in An array of bytes.
686		* returns a 64-bit word.
687		*/
688		sword64 load_4(const unsigned char *in)
689	0	{
690	0	word64 result;
691
692	0	result = ((((word64)in[0]) ) \|
693	0	(((word64)in[1]) << 8) \|
694	0	(((word64)in[2]) << 16) \|
695	0	(((word64)in[3]) << 24));
696
697	0	return (sword64)result;
698	0	}
699

Coverage Report

Created: 2026-05-18 06:53