/src/wolfssl-sp-math/wolfcrypt/src/fe_x25519_128.h
Line | Count | Source |
1 | | /* fe_x25519_128.h |
2 | | * |
3 | | * Copyright (C) 2006-2025 wolfSSL Inc. |
4 | | * |
5 | | * This file is part of wolfSSL. |
6 | | * |
7 | | * wolfSSL is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 3 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * wolfSSL is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA |
20 | | */ |
21 | | |
22 | | /* Generated using (from wolfssl): |
23 | | * cd ../scripts |
24 | | * ruby ./x25519/fe_x25519_128_gen.rb > ../wolfssl/wolfcrypt/src/fe_x25519_128.h |
25 | | */ |
26 | | |
27 | | void fe_init(void) |
28 | 13.3k | { |
29 | 13.3k | } |
30 | | |
31 | | /* Convert a number represented as an array of bytes to an array of words with |
32 | | * 51-bits of data in each word. |
33 | | * |
34 | | * in An array of bytes. |
35 | | * out An array of words. |
36 | | */ |
37 | | void fe_frombytes(fe out, const unsigned char *in) |
38 | 13.3k | { |
39 | 13.3k | out[0] = (sword64)( |
40 | 13.3k | (((word64)((in[ 0] ) )) ) |
41 | 13.3k | | (((word64)((in[ 1] ) )) << 8) |
42 | 13.3k | | (((word64)((in[ 2] ) )) << 16) |
43 | 13.3k | | (((word64)((in[ 3] ) )) << 24) |
44 | 13.3k | | (((word64)((in[ 4] ) )) << 32) |
45 | 13.3k | | (((word64)((in[ 5] ) )) << 40) |
46 | 13.3k | | (((word64)((in[ 6] ) & 0x07)) << 48)); |
47 | 13.3k | out[1] = (sword64)( |
48 | 13.3k | (((word64)((in[ 6] >> 3) & 0x1f)) ) |
49 | 13.3k | | (((word64)((in[ 7] ) )) << 5) |
50 | 13.3k | | (((word64)((in[ 8] ) )) << 13) |
51 | 13.3k | | (((word64)((in[ 9] ) )) << 21) |
52 | 13.3k | | (((word64)((in[10] ) )) << 29) |
53 | 13.3k | | (((word64)((in[11] ) )) << 37) |
54 | 13.3k | | (((word64)((in[12] ) & 0x3f)) << 45)); |
55 | 13.3k | out[2] = (sword64)( |
56 | 13.3k | (((word64)((in[12] >> 6) & 0x03)) ) |
57 | 13.3k | | (((word64)((in[13] ) )) << 2) |
58 | 13.3k | | (((word64)((in[14] ) )) << 10) |
59 | 13.3k | | (((word64)((in[15] ) )) << 18) |
60 | 13.3k | | (((word64)((in[16] ) )) << 26) |
61 | 13.3k | | (((word64)((in[17] ) )) << 34) |
62 | 13.3k | | (((word64)((in[18] ) )) << 42) |
63 | 13.3k | | (((word64)((in[19] ) & 0x01)) << 50)); |
64 | 13.3k | out[3] = (sword64)( |
65 | 13.3k | (((word64)((in[19] >> 1) & 0x7f)) ) |
66 | 13.3k | | (((word64)((in[20] ) )) << 7) |
67 | 13.3k | | (((word64)((in[21] ) )) << 15) |
68 | 13.3k | | (((word64)((in[22] ) )) << 23) |
69 | 13.3k | | (((word64)((in[23] ) )) << 31) |
70 | 13.3k | | (((word64)((in[24] ) )) << 39) |
71 | 13.3k | | (((word64)((in[25] ) & 0x0f)) << 47)); |
72 | 13.3k | out[4] = (sword64)( |
73 | 13.3k | (((word64)((in[25] >> 4) & 0x0f)) ) |
74 | 13.3k | | (((word64)((in[26] ) )) << 4) |
75 | 13.3k | | (((word64)((in[27] ) )) << 12) |
76 | 13.3k | | (((word64)((in[28] ) )) << 20) |
77 | 13.3k | | (((word64)((in[29] ) )) << 28) |
78 | 13.3k | | (((word64)((in[30] ) )) << 36) |
79 | 13.3k | | (((word64)((in[31] ) & 0x7f)) << 44)); |
80 | 13.3k | } |
81 | | |
82 | | /* Convert a number represented as an array of words to an array of bytes. |
83 | | * The array of words is normalized to an array of 51-bit data words and if |
84 | | * greater than the mod, modulo reduced by the prime 2^255 - 1. |
85 | | * |
86 | | * n An array of words. |
87 | | * out An array of bytes. |
88 | | */ |
89 | | void fe_tobytes(unsigned char *out, const fe n) |
90 | 32.4k | { |
91 | 32.4k | fe in; |
92 | 32.4k | sword64 c; |
93 | | |
94 | 32.4k | in[0] = n[0]; |
95 | 32.4k | in[1] = n[1]; |
96 | 32.4k | in[2] = n[2]; |
97 | 32.4k | in[3] = n[3]; |
98 | 32.4k | in[4] = n[4]; |
99 | | |
100 | | /* Normalize to 51-bits of data per word. */ |
101 | 32.4k | in[0] += (in[4] >> 51) * 19; in[4] &= 0x7ffffffffffff; |
102 | | |
103 | 32.4k | in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff; |
104 | 32.4k | in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff; |
105 | 32.4k | in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff; |
106 | 32.4k | in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff; |
107 | 32.4k | in[0] += (in[4] >> 51) * 19; |
108 | 32.4k | in[4] &= 0x7ffffffffffff; |
109 | | |
110 | 32.4k | c = (in[0] + 19) >> 51; |
111 | 32.4k | c = (in[1] + c) >> 51; |
112 | 32.4k | c = (in[2] + c) >> 51; |
113 | 32.4k | c = (in[3] + c) >> 51; |
114 | 32.4k | c = (in[4] + c) >> 51; |
115 | 32.4k | in[0] += c * 19; |
116 | 32.4k | in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff; |
117 | 32.4k | in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff; |
118 | 32.4k | in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff; |
119 | 32.4k | in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff; |
120 | 32.4k | in[4] &= 0x7ffffffffffff; |
121 | | |
122 | 32.4k | out[ 0] = (((byte)((in[0] ) )) ); |
123 | 32.4k | out[ 1] = (((byte)((in[0] >> 8) )) ); |
124 | 32.4k | out[ 2] = (((byte)((in[0] >> 16) )) ); |
125 | 32.4k | out[ 3] = (((byte)((in[0] >> 24) )) ); |
126 | 32.4k | out[ 4] = (((byte)((in[0] >> 32) )) ); |
127 | 32.4k | out[ 5] = (((byte)((in[0] >> 40) )) ); |
128 | 32.4k | out[ 6] = (byte)((((byte)((in[0] >> 48) & 0x07)) ) |
129 | 32.4k | | (((byte)((in[1] ) & 0x1f)) << 3)); |
130 | 32.4k | out[ 7] = (((byte)((in[1] >> 5) )) ); |
131 | 32.4k | out[ 8] = (((byte)((in[1] >> 13) )) ); |
132 | 32.4k | out[ 9] = (((byte)((in[1] >> 21) )) ); |
133 | 32.4k | out[10] = (((byte)((in[1] >> 29) )) ); |
134 | 32.4k | out[11] = (((byte)((in[1] >> 37) )) ); |
135 | 32.4k | out[12] = (byte)((((byte)((in[1] >> 45) & 0x3f)) ) |
136 | 32.4k | | (((byte)((in[2] ) & 0x03)) << 6)); |
137 | 32.4k | out[13] = (((byte)((in[2] >> 2) )) ); |
138 | 32.4k | out[14] = (((byte)((in[2] >> 10) )) ); |
139 | 32.4k | out[15] = (((byte)((in[2] >> 18) )) ); |
140 | 32.4k | out[16] = (((byte)((in[2] >> 26) )) ); |
141 | 32.4k | out[17] = (((byte)((in[2] >> 34) )) ); |
142 | 32.4k | out[18] = (((byte)((in[2] >> 42) )) ); |
143 | 32.4k | out[19] = (byte)((((byte)((in[2] >> 50) & 0x01)) ) |
144 | 32.4k | | (((byte)((in[3] ) & 0x7f)) << 1)); |
145 | 32.4k | out[20] = (((byte)((in[3] >> 7) )) ); |
146 | 32.4k | out[21] = (((byte)((in[3] >> 15) )) ); |
147 | 32.4k | out[22] = (((byte)((in[3] >> 23) )) ); |
148 | 32.4k | out[23] = (((byte)((in[3] >> 31) )) ); |
149 | 32.4k | out[24] = (((byte)((in[3] >> 39) )) ); |
150 | 32.4k | out[25] = (byte)((((byte)((in[3] >> 47) & 0x0f)) ) |
151 | 32.4k | | (((byte)((in[4] ) & 0x0f)) << 4)); |
152 | 32.4k | out[26] = (((byte)((in[4] >> 4) )) ); |
153 | 32.4k | out[27] = (((byte)((in[4] >> 12) )) ); |
154 | 32.4k | out[28] = (((byte)((in[4] >> 20) )) ); |
155 | 32.4k | out[29] = (((byte)((in[4] >> 28) )) ); |
156 | 32.4k | out[30] = (((byte)((in[4] >> 36) )) ); |
157 | 32.4k | out[31] = (((byte)((in[4] >> 44) & 0x7f)) ); |
158 | 32.4k | } |
159 | | |
160 | | /* Set the field element to 1. |
161 | | * |
162 | | * n The field element number. |
163 | | */ |
164 | | void fe_1(fe n) |
165 | 724k | { |
166 | 724k | n[0] = 0x0000000000001; |
167 | 724k | n[1] = 0x0000000000000; |
168 | 724k | n[2] = 0x0000000000000; |
169 | 724k | n[3] = 0x0000000000000; |
170 | 724k | n[4] = 0x0000000000000; |
171 | 724k | } |
172 | | |
173 | | /* Set the field element to 0. |
174 | | * |
175 | | * n The field element number. |
176 | | */ |
177 | | void fe_0(fe n) |
178 | 367k | { |
179 | 367k | n[0] = 0x0000000000000; |
180 | 367k | n[1] = 0x0000000000000; |
181 | 367k | n[2] = 0x0000000000000; |
182 | 367k | n[3] = 0x0000000000000; |
183 | 367k | n[4] = 0x0000000000000; |
184 | 367k | } |
185 | | |
186 | | /* Copy field element a into field element r. |
187 | | * |
188 | | * r Field element to copy into. |
189 | | * a Field element to copy. |
190 | | */ |
191 | | void fe_copy(fe r, const fe a) |
192 | 42.4k | { |
193 | 42.4k | r[0] = a[0]; |
194 | 42.4k | r[1] = a[1]; |
195 | 42.4k | r[2] = a[2]; |
196 | 42.4k | r[3] = a[3]; |
197 | 42.4k | r[4] = a[4]; |
198 | 42.4k | } |
199 | | |
200 | | /* Constant time, conditional swap of field elements a and b. |
201 | | * |
202 | | * f A field element. |
203 | | * g A field element. |
204 | | * b If 1 then swap and if 0 then don't swap. |
205 | | */ |
206 | | void fe_cswap(fe f, fe g, int b) |
207 | 4.37M | { |
208 | 4.37M | sword64 m = b; |
209 | 4.37M | sword64 t0, t1, t2, t3, t4; |
210 | | |
211 | | /* Convert conditional into mask. */ |
212 | 4.37M | m = -m; |
213 | 4.37M | t0 = m & (f[0] ^ g[0]); |
214 | 4.37M | t1 = m & (f[1] ^ g[1]); |
215 | 4.37M | t2 = m & (f[2] ^ g[2]); |
216 | 4.37M | t3 = m & (f[3] ^ g[3]); |
217 | 4.37M | t4 = m & (f[4] ^ g[4]); |
218 | | |
219 | 4.37M | f[0] ^= t0; |
220 | 4.37M | f[1] ^= t1; |
221 | 4.37M | f[2] ^= t2; |
222 | 4.37M | f[3] ^= t3; |
223 | 4.37M | f[4] ^= t4; |
224 | | |
225 | 4.37M | g[0] ^= t0; |
226 | 4.37M | g[1] ^= t1; |
227 | 4.37M | g[2] ^= t2; |
228 | 4.37M | g[3] ^= t3; |
229 | 4.37M | g[4] ^= t4; |
230 | 4.37M | } |
231 | | |
232 | | /* Subtract b from a into r. (r = a - b) |
233 | | * |
234 | | * r A field element. |
235 | | * a A field element. |
236 | | * b A field element. |
237 | | */ |
238 | | void fe_sub(fe r, const fe a, const fe b) |
239 | 7.12M | { |
240 | 7.12M | r[0] = a[0] - b[0]; |
241 | 7.12M | r[1] = a[1] - b[1]; |
242 | 7.12M | r[2] = a[2] - b[2]; |
243 | 7.12M | r[3] = a[3] - b[3]; |
244 | 7.12M | r[4] = a[4] - b[4]; |
245 | 7.12M | } |
246 | | |
247 | | /* Add b to a into r. (r = a + b) |
248 | | * |
249 | | * r A field element. |
250 | | * a A field element. |
251 | | * b A field element. |
252 | | */ |
253 | | void fe_add(fe r, const fe a, const fe b) |
254 | 7.11M | { |
255 | 7.11M | r[0] = a[0] + b[0]; |
256 | 7.11M | r[1] = a[1] + b[1]; |
257 | 7.11M | r[2] = a[2] + b[2]; |
258 | 7.11M | r[3] = a[3] + b[3]; |
259 | 7.11M | r[4] = a[4] + b[4]; |
260 | 7.11M | } |
261 | | |
262 | | /* Multiply a and b into r. (r = a * b) |
263 | | * |
264 | | * r A field element. |
265 | | * a A field element. |
266 | | * b A field element. |
267 | | */ |
268 | | void fe_mul(fe r, const fe a, const fe b) |
269 | 10.5M | { |
270 | 10.5M | const __int128_t k19 = 19; |
271 | 10.5M | __int128_t t0 = ((__int128_t)a[0]) * b[0]; |
272 | 10.5M | __int128_t t1 = ((__int128_t)a[0]) * b[1] |
273 | 10.5M | + ((__int128_t)a[1]) * b[0]; |
274 | 10.5M | __int128_t t2 = ((__int128_t)a[0]) * b[2] |
275 | 10.5M | + ((__int128_t)a[1]) * b[1] |
276 | 10.5M | + ((__int128_t)a[2]) * b[0]; |
277 | 10.5M | __int128_t t3 = ((__int128_t)a[0]) * b[3] |
278 | 10.5M | + ((__int128_t)a[1]) * b[2] |
279 | 10.5M | + ((__int128_t)a[2]) * b[1] |
280 | 10.5M | + ((__int128_t)a[3]) * b[0]; |
281 | 10.5M | __int128_t t4 = ((__int128_t)a[0]) * b[4] |
282 | 10.5M | + ((__int128_t)a[1]) * b[3] |
283 | 10.5M | + ((__int128_t)a[2]) * b[2] |
284 | 10.5M | + ((__int128_t)a[3]) * b[1] |
285 | 10.5M | + ((__int128_t)a[4]) * b[0]; |
286 | 10.5M | __int128_t t5 = ((__int128_t)a[1]) * b[4] |
287 | 10.5M | + ((__int128_t)a[2]) * b[3] |
288 | 10.5M | + ((__int128_t)a[3]) * b[2] |
289 | 10.5M | + ((__int128_t)a[4]) * b[1]; |
290 | 10.5M | __int128_t t6 = ((__int128_t)a[2]) * b[4] |
291 | 10.5M | + ((__int128_t)a[3]) * b[3] |
292 | 10.5M | + ((__int128_t)a[4]) * b[2]; |
293 | 10.5M | __int128_t t7 = ((__int128_t)a[3]) * b[4] |
294 | 10.5M | + ((__int128_t)a[4]) * b[3]; |
295 | 10.5M | __int128_t t8 = ((__int128_t)a[4]) * b[4]; |
296 | | |
297 | | /* Modulo reduce double long word. */ |
298 | 10.5M | t0 += t5 * k19; |
299 | 10.5M | t1 += t6 * k19; |
300 | 10.5M | t2 += t7 * k19; |
301 | 10.5M | t3 += t8 * k19; |
302 | | |
303 | | /* Normalize to 51-bits of data per word. */ |
304 | 10.5M | t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; |
305 | | |
306 | 10.5M | t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; |
307 | 10.5M | t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; |
308 | 10.5M | t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; |
309 | 10.5M | t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; |
310 | 10.5M | r[0] += (sword64)((t4 >> 51) * k19); |
311 | 10.5M | r[4] = t4 & 0x7ffffffffffff; |
312 | 10.5M | } |
313 | | |
314 | | /* Square a and put result in r. (r = a * a) |
315 | | * |
316 | | * r A field element. |
317 | | * a A field element. |
318 | | * b A field element. |
319 | | */ |
320 | | void fe_sq(fe r, const fe a) |
321 | 9.87M | { |
322 | 9.87M | const __int128_t k19 = 19; |
323 | 9.87M | const __int128_t k2 = 2; |
324 | 9.87M | __int128_t t0 = ((__int128_t)a[0]) * a[0]; |
325 | 9.87M | __int128_t t1 = ((__int128_t)a[0]) * a[1] * k2; |
326 | 9.87M | __int128_t t2 = ((__int128_t)a[0]) * a[2] * k2 |
327 | 9.87M | + ((__int128_t)a[1]) * a[1]; |
328 | 9.87M | __int128_t t3 = ((__int128_t)a[0]) * a[3] * k2 |
329 | 9.87M | + ((__int128_t)a[1]) * a[2] * k2; |
330 | 9.87M | __int128_t t4 = ((__int128_t)a[0]) * a[4] * k2 |
331 | 9.87M | + ((__int128_t)a[1]) * a[3] * k2 |
332 | 9.87M | + ((__int128_t)a[2]) * a[2]; |
333 | 9.87M | __int128_t t5 = ((__int128_t)a[1]) * a[4] * k2 |
334 | 9.87M | + ((__int128_t)a[2]) * a[3] * k2; |
335 | 9.87M | __int128_t t6 = ((__int128_t)a[2]) * a[4] * k2 |
336 | 9.87M | + ((__int128_t)a[3]) * a[3]; |
337 | 9.87M | __int128_t t7 = ((__int128_t)a[3]) * a[4] * k2; |
338 | 9.87M | __int128_t t8 = ((__int128_t)a[4]) * a[4]; |
339 | | |
340 | | /* Modulo reduce double long word. */ |
341 | 9.87M | t0 += t5 * k19; |
342 | 9.87M | t1 += t6 * k19; |
343 | 9.87M | t2 += t7 * k19; |
344 | 9.87M | t3 += t8 * k19; |
345 | | |
346 | | /* Normalize to 51-bits of data per word. */ |
347 | 9.87M | t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; |
348 | | |
349 | 9.87M | t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; |
350 | 9.87M | t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; |
351 | 9.87M | t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; |
352 | 9.87M | t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; |
353 | 9.87M | r[0] += (sword64)((t4 >> 51) * k19); |
354 | 9.87M | r[4] = t4 & 0x7ffffffffffff; |
355 | 9.87M | } |
356 | | |
357 | | /* Multiply a by 121666 and put result in r. (r = 121666 * a) |
358 | | * |
359 | | * r A field element. |
360 | | * a A field element. |
361 | | * b A field element. |
362 | | */ |
363 | | void fe_mul121666(fe r, fe a) |
364 | 1.00M | { |
365 | 1.00M | const __int128_t k19 = 19; |
366 | 1.00M | const __int128_t k121666 = 121666; |
367 | 1.00M | __int128_t t0 = ((__int128_t)a[0]) * k121666; |
368 | 1.00M | __int128_t t1 = ((__int128_t)a[1]) * k121666; |
369 | 1.00M | __int128_t t2 = ((__int128_t)a[2]) * k121666; |
370 | 1.00M | __int128_t t3 = ((__int128_t)a[3]) * k121666; |
371 | 1.00M | __int128_t t4 = ((__int128_t)a[4]) * k121666; |
372 | | |
373 | | /* Normalize to 51-bits of data per word. */ |
374 | 1.00M | t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; |
375 | | |
376 | 1.00M | t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; |
377 | 1.00M | t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; |
378 | 1.00M | t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; |
379 | 1.00M | t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; |
380 | 1.00M | r[0] += (sword64)((t4 >> 51) * k19); |
381 | 1.00M | r[4] = t4 & 0x7ffffffffffff; |
382 | 1.00M | } |
383 | | |
384 | | /* Find the inverse of a modulo 2^255 - 1 and put result in r. |
385 | | * (r * a) mod (2^255 - 1) = 1 |
386 | | * Implementation is constant time. |
387 | | * |
388 | | * r A field element. |
389 | | * a A field element. |
390 | | */ |
391 | | void fe_invert(fe r, const fe a) |
392 | 11.4k | { |
393 | 11.4k | fe t0, t1, t2, t3; |
394 | 11.4k | int i; |
395 | | |
396 | | /* a ^ (2^255 - 21) */ |
397 | 11.4k | fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0); |
398 | 22.9k | fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1); |
399 | 11.4k | fe_mul(t0, t0, t1); |
400 | 11.4k | fe_sq(t2, t0); for (i = 1; i < 1; ++i) fe_sq(t2, t2); fe_mul(t1, t1, t2); |
401 | 57.3k | fe_sq(t2, t1); for (i = 1; i < 5; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); |
402 | 114k | fe_sq(t2, t1); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1); |
403 | 229k | fe_sq(t3, t2); for (i = 1; i < 20; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2); |
404 | 114k | fe_sq(t2, t2); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); |
405 | 573k | fe_sq(t2, t1); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1); |
406 | 1.14M | fe_sq(t3, t2); for (i = 1; i < 100; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2); |
407 | 573k | fe_sq(t2, t2); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); |
408 | 57.3k | fe_sq(t1, t1); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul( r, t1, t0); |
409 | 11.4k | } |
410 | | |
411 | | #ifndef CURVE25519_SMALL |
412 | | #ifndef WOLFSSL_CURVE25519_BLINDING |
413 | | /* Scalar multiply the field element a by n using Montgomery Ladder and places |
414 | | * result in r. |
415 | | * |
416 | | * r A field element as an array of bytes. |
417 | | * n The scalar as an array of bytes. |
418 | | * a A field element as an array of bytes. |
419 | | */ |
420 | | int curve25519(byte* r, const byte* n, const byte* a) |
421 | | { |
422 | | fe x1, x2, z2, x3, z3; |
423 | | fe t0, t1; |
424 | | int pos; |
425 | | unsigned int swap; |
426 | | unsigned int b; |
427 | | |
428 | | fe_frombytes(x1, a); |
429 | | fe_1(x2); |
430 | | fe_0(z2); |
431 | | fe_copy(x3, x1); |
432 | | fe_1(z3); |
433 | | |
434 | | swap = 0; |
435 | | for (pos = 254;pos >= 0;--pos) { |
436 | | b = (unsigned int)(n[pos / 8] >> (pos & 7)); |
437 | | b &= 1; |
438 | | swap ^= b; |
439 | | fe_cswap(x2, x3, (int)swap); |
440 | | fe_cswap(z2, z3, (int)swap); |
441 | | swap = b; |
442 | | |
443 | | fe_sub(t0, x3, z3); |
444 | | fe_sub(t1, x2, z2); |
445 | | fe_add(x2, x2, z2); |
446 | | fe_add(z2, x3, z3); |
447 | | fe_mul(z3, t0, x2); |
448 | | fe_mul(z2, z2, t1); |
449 | | fe_sq(t0, t1); |
450 | | fe_sq(t1, x2); |
451 | | fe_add(x3, z3, z2); |
452 | | fe_sub(z2, z3, z2); |
453 | | fe_mul(x2, t1, t0); |
454 | | fe_sub(t1, t1, t0); |
455 | | fe_sq(z2, z2); |
456 | | fe_mul121666(z3, t1); |
457 | | fe_sq(x3, x3); |
458 | | fe_add(t0, t0, z3); |
459 | | fe_mul(z3, x1, z2); |
460 | | fe_mul(z2, t1, t0); |
461 | | } |
462 | | fe_cswap(x2, x3, (int)swap); |
463 | | fe_cswap(z2, z3, (int)swap); |
464 | | |
465 | | fe_invert(z2, z2); |
466 | | fe_mul(x2, x2, z2); |
467 | | fe_tobytes(r, x2); |
468 | | |
469 | | return 0; |
470 | | } |
471 | | #else |
472 | | int curve25519_blind(byte* r, const byte* n, const byte* mask, const byte* a, |
473 | | const byte* rz) |
474 | 3.92k | { |
475 | 3.92k | fe x1, x2, z2, x3, z3; |
476 | 3.92k | fe t0, t1; |
477 | 3.92k | int pos; |
478 | 3.92k | unsigned int b; |
479 | | |
480 | 3.92k | fe_frombytes(x1, a); |
481 | 3.92k | fe_1(x2); |
482 | 3.92k | fe_0(z2); |
483 | 3.92k | fe_copy(x3, x1); |
484 | 3.92k | fe_frombytes(z3, rz); |
485 | 3.92k | fe_mul(x3, x3, z3); |
486 | | |
487 | | /* mask_bits[252] */ |
488 | 3.92k | b = (unsigned int)(mask[31] >> 7); |
489 | 3.92k | b &= 1; |
490 | 3.92k | fe_cswap(x2,x3,(int)b); |
491 | 3.92k | fe_cswap(z2,z3,(int)b); |
492 | 1.00M | for (pos = 255;pos >= 1;--pos) { |
493 | 1.00M | b = (unsigned int)(n[pos / 8] >> (pos & 7)); |
494 | 1.00M | b &= 1; |
495 | 1.00M | fe_cswap(x2, x3, (int)b); |
496 | 1.00M | fe_cswap(z2, z3, (int)b); |
497 | | |
498 | | /* montgomery */ |
499 | 1.00M | fe_sub(t0, x3, z3); |
500 | 1.00M | fe_sub(t1, x2, z2); |
501 | 1.00M | fe_add(x2, x2, z2); |
502 | 1.00M | fe_add(z2, x3, z3); |
503 | 1.00M | fe_mul(z3, t0, x2); |
504 | 1.00M | fe_mul(z2, z2, t1); |
505 | 1.00M | fe_sq(t0, t1); |
506 | 1.00M | fe_sq(t1, x2); |
507 | 1.00M | fe_add(x3, z3, z2); |
508 | 1.00M | fe_sub(z2, z3, z2); |
509 | 1.00M | fe_mul(x2, t1, t0); |
510 | 1.00M | fe_sub(t1, t1, t0); |
511 | 1.00M | fe_sq(z2, z2); |
512 | 1.00M | fe_mul121666(z3, t1); |
513 | 1.00M | fe_sq(x3, x3); |
514 | 1.00M | fe_add(t0, t0, z3); |
515 | 1.00M | fe_mul(z3, x1, z2); |
516 | 1.00M | fe_mul(z2, t1, t0); |
517 | | |
518 | 1.00M | b = (unsigned int)(mask[(pos - 1) / 8] >> ((pos - 1) & 7)); |
519 | 1.00M | b &= 1; |
520 | 1.00M | fe_cswap(x2, x3, (int)b); |
521 | 1.00M | fe_cswap(z2, z3, (int)b); |
522 | 1.00M | } |
523 | 3.92k | b = (unsigned int)(n[0] & 1); |
524 | 3.92k | fe_cswap(x2, x3, (int)b); |
525 | 3.92k | fe_cswap(z2, z3, (int)b); |
526 | | |
527 | 3.92k | fe_invert(z2, z2); |
528 | 3.92k | fe_mul(x2, x2, z2); |
529 | 3.92k | fe_tobytes(r, x2); |
530 | | |
531 | 3.92k | return 0; |
532 | 3.92k | } |
533 | | #endif /* WOLFSSL_CURVE25519_BLINDING */ |
534 | | #endif /* !CURVE25519_SMALL */ |
535 | | |
536 | | /* The field element value 0 as an array of bytes. */ |
537 | | static const unsigned char zero[32] = {0}; |
538 | | |
539 | | /* Constant time check as to whether a is not 0. |
540 | | * |
541 | | * a A field element. |
542 | | */ |
543 | | int fe_isnonzero(const fe a) |
544 | 8.09k | { |
545 | 8.09k | unsigned char s[32]; |
546 | 8.09k | fe_tobytes(s, a); |
547 | 8.09k | return ConstantCompare(s, zero, 32); |
548 | 8.09k | } |
549 | | |
550 | | /* Checks whether a is negative. |
551 | | * |
552 | | * a A field element. |
553 | | */ |
554 | | int fe_isnegative(const fe a) |
555 | 12.8k | { |
556 | 12.8k | unsigned char s[32]; |
557 | 12.8k | fe_tobytes(s, a); |
558 | 12.8k | return s[0] & 1; |
559 | 12.8k | } |
560 | | |
561 | | /* Negates field element a and stores the result in r. |
562 | | * |
563 | | * r A field element. |
564 | | * a A field element. |
565 | | */ |
566 | | void fe_neg(fe r, const fe a) |
567 | 358k | { |
568 | 358k | r[0] = -a[0]; |
569 | 358k | r[1] = -a[1]; |
570 | 358k | r[2] = -a[2]; |
571 | 358k | r[3] = -a[3]; |
572 | 358k | r[4] = -a[4]; |
573 | 358k | } |
574 | | |
575 | | /* Constant time, conditional move of b into a. |
576 | | * a is not changed if the condition is 0. |
577 | | * |
578 | | * f A field element. |
579 | | * g A field element. |
580 | | * b If 1 then copy and if 0 then don't copy. |
581 | | */ |
582 | | void fe_cmov(fe f, const fe g, int b) |
583 | 8.89M | { |
584 | 8.89M | sword64 m = b; |
585 | 8.89M | sword64 t0, t1, t2, t3, t4; |
586 | | |
587 | | /* Convert conditional into mask. */ |
588 | 8.89M | m = -m; |
589 | 8.89M | t0 = m & (f[0] ^ g[0]); |
590 | 8.89M | t1 = m & (f[1] ^ g[1]); |
591 | 8.89M | t2 = m & (f[2] ^ g[2]); |
592 | 8.89M | t3 = m & (f[3] ^ g[3]); |
593 | 8.89M | t4 = m & (f[4] ^ g[4]); |
594 | | |
595 | 8.89M | f[0] ^= t0; |
596 | 8.89M | f[1] ^= t1; |
597 | 8.89M | f[2] ^= t2; |
598 | 8.89M | f[3] ^= t3; |
599 | 8.89M | f[4] ^= t4; |
600 | 8.89M | } |
601 | | |
602 | | void fe_pow22523(fe r, const fe a) |
603 | 5.47k | { |
604 | 5.47k | fe t0, t1, t2; |
605 | 5.47k | int i; |
606 | | |
607 | | /* a ^ (2^255 - 23) */ |
608 | 5.47k | fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0); |
609 | 10.9k | fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1); |
610 | 5.47k | fe_mul(t0, t0, t1); |
611 | 5.47k | fe_sq(t0, t0); for (i = 1; i < 1; ++i) fe_sq(t0, t0); fe_mul(t0, t1, t0); |
612 | 27.3k | fe_sq(t1, t0); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0); |
613 | 54.7k | fe_sq(t1, t0); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0); |
614 | 109k | fe_sq(t2, t1); for (i = 1; i < 20; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); |
615 | 54.7k | fe_sq(t1, t1); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0); |
616 | 273k | fe_sq(t1, t0); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0); |
617 | 547k | fe_sq(t2, t1); for (i = 1; i < 100; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1); |
618 | 273k | fe_sq(t1, t1); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0); |
619 | 10.9k | fe_sq(t0, t0); for (i = 1; i < 2; ++i) fe_sq(t0, t0); fe_mul( r, t0, a); |
620 | | |
621 | 5.47k | return; |
622 | 5.47k | } |
623 | | |
624 | | /* Double the square of a and put result in r. (r = 2 * a * a) |
625 | | * |
626 | | * r A field element. |
627 | | * a A field element. |
628 | | * b A field element. |
629 | | */ |
630 | | void fe_sq2(fe r, const fe a) |
631 | 522k | { |
632 | 522k | const __int128_t k2 = 2; |
633 | 522k | const __int128_t k19 = 19; |
634 | 522k | __int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]); |
635 | 522k | __int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2); |
636 | 522k | __int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2 |
637 | 522k | + ((__int128_t)a[1]) * a[1]); |
638 | 522k | __int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2 |
639 | 522k | + ((__int128_t)a[1]) * a[2] * k2); |
640 | 522k | __int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2 |
641 | 522k | + ((__int128_t)a[1]) * a[3] * k2 |
642 | 522k | + ((__int128_t)a[2]) * a[2]); |
643 | 522k | __int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2 |
644 | 522k | + ((__int128_t)a[2]) * a[3] * k2); |
645 | 522k | __int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2 |
646 | 522k | + ((__int128_t)a[3]) * a[3]); |
647 | 522k | __int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2); |
648 | 522k | __int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]); |
649 | | |
650 | | /* Modulo reduce double long word. */ |
651 | 522k | t0 += t5 * k19; |
652 | 522k | t1 += t6 * k19; |
653 | 522k | t2 += t7 * k19; |
654 | 522k | t3 += t8 * k19; |
655 | | |
656 | | /* Normalize to 51-bits of data per word. */ |
657 | 522k | t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; |
658 | | |
659 | 522k | t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; |
660 | 522k | t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; |
661 | 522k | t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; |
662 | 522k | t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; |
663 | 522k | r[0] += (sword64)((t4 >> 51) * k19); |
664 | 522k | r[4] = t4 & 0x7ffffffffffff; |
665 | 522k | } |
666 | | |
667 | | /* Load 3 little endian bytes into a 64-bit word. |
668 | | * |
669 | | * in An array of bytes. |
670 | | * returns a 64-bit word. |
671 | | */ |
672 | | sword64 load_3(const unsigned char *in) |
673 | 0 | { |
674 | 0 | word64 result; |
675 | |
|
676 | 0 | result = ((((word64)in[0]) ) | |
677 | 0 | (((word64)in[1]) << 8) | |
678 | 0 | (((word64)in[2]) << 16)); |
679 | |
|
680 | 0 | return (sword64)result; |
681 | 0 | } |
682 | | |
683 | | /* Load 4 little endian bytes into a 64-bit word. |
684 | | * |
685 | | * in An array of bytes. |
686 | | * returns a 64-bit word. |
687 | | */ |
688 | | sword64 load_4(const unsigned char *in) |
689 | 0 | { |
690 | 0 | word64 result; |
691 | |
|
692 | 0 | result = ((((word64)in[0]) ) | |
693 | 0 | (((word64)in[1]) << 8) | |
694 | 0 | (((word64)in[2]) << 16) | |
695 | 0 | (((word64)in[3]) << 24)); |
696 | |
|
697 | 0 | return (sword64)result; |
698 | 0 | } |
699 | | |