/src/wolfssl-normal-math/wolfcrypt/src/fe_448.c
Line | Count | Source |
1 | | /* fe_448.c |
2 | | * |
3 | | * Copyright (C) 2006-2025 wolfSSL Inc. |
4 | | * |
5 | | * This file is part of wolfSSL. |
6 | | * |
7 | | * wolfSSL is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 3 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * wolfSSL is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA |
20 | | */ |
21 | | |
22 | | /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. |
23 | | * Small implementation based on Daniel Beer's curve25519 public domain work. |
24 | | * Reworked for curve448 by Sean Parkinson. |
25 | | */ |
26 | | |
27 | | #include <wolfssl/wolfcrypt/libwolfssl_sources.h> |
28 | | |
29 | | #if defined(HAVE_CURVE448) || defined(HAVE_ED448) |
30 | | |
31 | | #include <wolfssl/wolfcrypt/fe_448.h> |
32 | | |
33 | | #ifdef NO_INLINE |
34 | | #include <wolfssl/wolfcrypt/misc.h> |
35 | | #else |
36 | | #define WOLFSSL_MISC_INCLUDED |
37 | | #include <wolfcrypt/src/misc.c> |
38 | | #endif |
39 | | |
40 | | #if defined(CURVE448_SMALL) || defined(ED448_SMALL) |
41 | | |
42 | | /* Initialize the field element operations. |
43 | | */ |
44 | | void fe448_init(void) |
45 | | { |
46 | | } |
47 | | |
48 | | /* Normalize the field element. |
49 | | * Ensure result is in range: 0..2^448-2^224-2 |
50 | | * |
51 | | * a [in] Field element in range 0..2^448-1. |
52 | | */ |
53 | | void fe448_norm(word8* a) |
54 | | { |
55 | | int i; |
56 | | sword16 c = 0; |
57 | | sword16 o = 0; |
58 | | |
59 | | for (i = 0; i < 56; i++) { |
60 | | c += a[i]; |
61 | | if ((i == 0) || (i == 28)) |
62 | | c += 1; |
63 | | c >>= 8; |
64 | | } |
65 | | |
66 | | for (i = 0; i < 56; i++) { |
67 | | if ((i == 0) || (i == 28)) o += c; |
68 | | o += a[i]; |
69 | | a[i] = (word8)o; |
70 | | o >>= 8; |
71 | | } |
72 | | } |
73 | | |
74 | | /* Copy one field element into another: d = a. |
75 | | * |
76 | | * d [in] Destination field element. |
77 | | * a [in] Source field element. |
78 | | */ |
79 | | void fe448_copy(word8* d, const word8* a) |
80 | | { |
81 | | int i; |
82 | | for (i = 0; i < 56; i++) { |
83 | | d[i] = a[i]; |
84 | | } |
85 | | } |
86 | | |
87 | | /* Conditionally swap the elements. |
88 | | * Constant time implementation. |
89 | | * |
90 | | * a [in] First field element. |
91 | | * b [in] Second field element. |
92 | | * c [in] Swap when 1. Valid values: 0, 1. |
93 | | */ |
94 | | static void fe448_cswap(word8* a, word8* b, int c) |
95 | | { |
96 | | int i; |
97 | | word8 mask = -(word8)c; |
98 | | word8 t[56]; |
99 | | |
100 | | for (i = 0; i < 56; i++) |
101 | | t[i] = (a[i] ^ b[i]) & mask; |
102 | | for (i = 0; i < 56; i++) |
103 | | a[i] ^= t[i]; |
104 | | for (i = 0; i < 56; i++) |
105 | | b[i] ^= t[i]; |
106 | | } |
107 | | |
108 | | /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1) |
109 | | * |
110 | | * r [in] Field element to hold sum. |
111 | | * a [in] Field element to add. |
112 | | * b [in] Field element to add. |
113 | | */ |
114 | | void fe448_add(word8* r, const word8* a, const word8* b) |
115 | | { |
116 | | int i; |
117 | | sword16 c = 0; |
118 | | sword16 o = 0; |
119 | | |
120 | | for (i = 0; i < 56; i++) { |
121 | | c += a[i]; |
122 | | c += b[i]; |
123 | | r[i] = (word8)c; |
124 | | c >>= 8; |
125 | | } |
126 | | |
127 | | for (i = 0; i < 56; i++) { |
128 | | if ((i == 0) || (i == 28)) o += c; |
129 | | o += r[i]; |
130 | | r[i] = (word8)o; |
131 | | o >>= 8; |
132 | | } |
133 | | } |
134 | | |
135 | | /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1) |
136 | | * |
137 | | * r [in] Field element to hold difference. |
138 | | * a [in] Field element to subtract from. |
139 | | * b [in] Field element to subtract. |
140 | | */ |
141 | | void fe448_sub(word8* r, const word8* a, const word8* b) |
142 | | { |
143 | | int i; |
144 | | sword16 c = 0; |
145 | | sword16 o = 0; |
146 | | |
147 | | for (i = 0; i < 56; i++) { |
148 | | if (i == 28) |
149 | | c += 0x1fc; |
150 | | else |
151 | | c += 0x1fe; |
152 | | c += a[i]; |
153 | | c -= b[i]; |
154 | | r[i] = (word8)c; |
155 | | c >>= 8; |
156 | | } |
157 | | |
158 | | for (i = 0; i < 56; i++) { |
159 | | if ((i == 0) || (i == 28)) o += c; |
160 | | o += r[i]; |
161 | | r[i] = (word8)o; |
162 | | o >>= 8; |
163 | | } |
164 | | } |
165 | | |
166 | | /* Multiply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1) |
167 | | * |
168 | | * r [in] Field element to hold result. |
169 | | * a [in] Field element to multiply. |
170 | | */ |
171 | | void fe448_mul39081(word8* r, const word8* a) |
172 | | { |
173 | | int i; |
174 | | sword32 c = 0; |
175 | | sword32 o = 0; |
176 | | |
177 | | for (i = 0; i < 56; i++) { |
178 | | c += a[i] * (sword32)39081; |
179 | | r[i] = (word8)c; |
180 | | c >>= 8; |
181 | | } |
182 | | |
183 | | for (i = 0; i < 56; i++) { |
184 | | if ((i == 0) || (i == 28)) o += c; |
185 | | o += r[i]; |
186 | | r[i] = (word8)o; |
187 | | o >>= 8; |
188 | | } |
189 | | } |
190 | | |
191 | | /* Multiply two field elements. r = (a * b) mod (2^448 - 2^224 - 1) |
192 | | * |
193 | | * r [in] Field element to hold result. |
194 | | * a [in] Field element to multiply. |
195 | | * b [in] Field element to multiply. |
196 | | */ |
197 | | void fe448_mul(word8* r, const word8* a, const word8* b) |
198 | | { |
199 | | int i, k; |
200 | | sword32 c = 0; |
201 | | sword16 o = 0, cc = 0; |
202 | | word8 t[112]; |
203 | | |
204 | | for (k = 0; k < 56; k++) { |
205 | | i = 0; |
206 | | for (; i <= k; i++) { |
207 | | c += (sword32)a[i] * b[k - i]; |
208 | | } |
209 | | t[k] = (word8)c; |
210 | | c >>= 8; |
211 | | } |
212 | | for (; k < 111; k++) { |
213 | | i = k - 55; |
214 | | for (; i < 56; i++) { |
215 | | c += (sword32)a[i] * b[k - i]; |
216 | | } |
217 | | t[k] = (word8)c; |
218 | | c >>= 8; |
219 | | } |
220 | | t[k] = (word8)c; |
221 | | |
222 | | for (i = 0; i < 28; i++) { |
223 | | o += t[i]; |
224 | | o += t[i + 56]; |
225 | | o += t[i + 84]; |
226 | | r[i] = (word8)o; |
227 | | o >>= 8; |
228 | | } |
229 | | for (i = 28; i < 56; i++) { |
230 | | o += t[i]; |
231 | | o += t[i + 56]; |
232 | | o += t[i + 28]; |
233 | | o += t[i + 56]; |
234 | | r[i] = (word8)o; |
235 | | o >>= 8; |
236 | | } |
237 | | for (i = 0; i < 56; i++) { |
238 | | if ((i == 0) || (i == 28)) cc += o; |
239 | | cc += r[i]; |
240 | | r[i] = (word8)cc; |
241 | | cc >>= 8; |
242 | | } |
243 | | } |
244 | | |
245 | | /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1) |
246 | | * |
247 | | * r [in] Field element to hold result. |
248 | | * a [in] Field element to square. |
249 | | */ |
250 | | void fe448_sqr(word8* r, const word8* a) |
251 | | { |
252 | | int i, k; |
253 | | sword32 c = 0; |
254 | | sword32 p; |
255 | | sword16 o = 0, cc = 0; |
256 | | word8 t[112]; |
257 | | |
258 | | for (k = 0; k < 56; k++) { |
259 | | i = 0; |
260 | | for (; i <= k; i++) { |
261 | | if (k - i < i) |
262 | | break; |
263 | | p = (sword32)a[i] * a[k - i]; |
264 | | if (k - i != i) |
265 | | p *= 2; |
266 | | c += p; |
267 | | } |
268 | | t[k] = (word8)c; |
269 | | c >>= 8; |
270 | | } |
271 | | for (; k < 111; k++) { |
272 | | i = k - 55; |
273 | | for (; i < 56; i++) { |
274 | | if (k - i < i) |
275 | | break; |
276 | | p = (sword32)a[i] * a[k - i]; |
277 | | if (k - i != i) |
278 | | p *= 2; |
279 | | c += p; |
280 | | } |
281 | | t[k] = (word8)c; |
282 | | c >>= 8; |
283 | | } |
284 | | t[k] = (word8)c; |
285 | | |
286 | | for (i = 0; i < 28; i++) { |
287 | | o += t[i]; |
288 | | o += t[i + 56]; |
289 | | o += t[i + 84]; |
290 | | r[i] = (word8)o; |
291 | | o >>= 8; |
292 | | } |
293 | | for (i = 28; i < 56; i++) { |
294 | | o += t[i]; |
295 | | o += t[i + 56]; |
296 | | o += t[i + 28]; |
297 | | o += t[i + 56]; |
298 | | r[i] = (word8)o; |
299 | | o >>= 8; |
300 | | } |
301 | | for (i = 0; i < 56; i++) { |
302 | | if ((i == 0) || (i == 28)) cc += o; |
303 | | cc += r[i]; |
304 | | r[i] = (word8)cc; |
305 | | cc >>= 8; |
306 | | } |
307 | | fe448_norm(r); |
308 | | } |
309 | | |
310 | | /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1 |
311 | | * Constant time implementation - using Fermat's little theorem: |
312 | | * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a |
313 | | * For curve448: p - 2 = 2^448 - 2^224 - 3 |
314 | | * |
315 | | * r [in] Field element to hold result. |
316 | | * a [in] Field element to invert. |
317 | | */ |
318 | | void fe448_invert(word8* r, const word8* a) |
319 | | { |
320 | | int i; |
321 | | word8 t[56]; |
322 | | |
323 | | fe448_sqr(t, a); |
324 | | fe448_mul(t, t, a); |
325 | | for (i = 0; i < 221; i++) { |
326 | | fe448_sqr(t, t); |
327 | | fe448_mul(t, t, a); |
328 | | } |
329 | | fe448_sqr(t, t); |
330 | | for (i = 0; i < 222; i++) { |
331 | | fe448_sqr(t, t); |
332 | | fe448_mul(t, t, a); |
333 | | } |
334 | | fe448_sqr(t, t); |
335 | | fe448_sqr(t, t); |
336 | | fe448_mul(r, t, a); |
337 | | } |
338 | | |
339 | | /* Scalar multiply the point by a number. r = n.a |
340 | | * Uses Montgomery ladder and only requires the x-ordinate. |
341 | | * |
342 | | * r [in] Field element to hold result. |
343 | | * n [in] Scalar as an array of bytes. |
344 | | * a [in] Point to multiply - x-ordinate only. |
345 | | */ |
346 | | int curve448(byte* r, const byte* n, const byte* a) |
347 | | { |
348 | | word8 x1[56]; |
349 | | word8 x2[56] = {1}; |
350 | | word8 z2[56] = {0}; |
351 | | word8 x3[56]; |
352 | | word8 z3[56] = {1}; |
353 | | word8 t0[56]; |
354 | | word8 t1[56]; |
355 | | int i; |
356 | | unsigned int swap; |
357 | | |
358 | | fe448_copy(x1, a); |
359 | | fe448_copy(x3, a); |
360 | | |
361 | | swap = 0; |
362 | | for (i = 447; i >= 0; --i) { |
363 | | unsigned int b = (n[i >> 3] >> (i & 7)) & 1; |
364 | | swap ^= b; |
365 | | fe448_cswap(x2, x3, swap); |
366 | | fe448_cswap(z2, z3, swap); |
367 | | swap = b; |
368 | | |
369 | | /* Montgomery Ladder - double and add */ |
370 | | fe448_add(t0, x2, z2); |
371 | | fe448_add(t1, x3, z3); |
372 | | fe448_sub(x2, x2, z2); |
373 | | fe448_sub(x3, x3, z3); |
374 | | fe448_mul(t1, t1, x2); |
375 | | fe448_mul(z3, x3, t0); |
376 | | fe448_sqr(t0, t0); |
377 | | fe448_sqr(x2, x2); |
378 | | fe448_add(x3, z3, t1); |
379 | | fe448_sqr(x3, x3); |
380 | | fe448_sub(z3, z3, t1); |
381 | | fe448_sqr(z3, z3); |
382 | | fe448_mul(z3, z3, x1); |
383 | | fe448_sub(t1, t0, x2); |
384 | | fe448_mul(x2, t0, x2); |
385 | | fe448_mul39081(z2, t1); |
386 | | fe448_add(z2, t0, z2); |
387 | | fe448_mul(z2, z2, t1); |
388 | | } |
389 | | fe448_cswap(x2, x3, swap); |
390 | | fe448_cswap(z2, z3, swap); |
391 | | |
392 | | fe448_invert(z2, z2); |
393 | | fe448_mul(r, x2, z2); |
394 | | fe448_norm(r); |
395 | | |
396 | | return 0; |
397 | | } |
398 | | |
399 | | #ifdef HAVE_ED448 |
400 | | /* Check whether field element is not 0. |
401 | | * Field element must have been normalized before call. |
402 | | * |
403 | | * a [in] Field element. |
404 | | * returns 0 when zero, and any other value otherwise. |
405 | | */ |
406 | | int fe448_isnonzero(const word8* a) |
407 | | { |
408 | | int i; |
409 | | byte c = 0; |
410 | | for (i = 0; i < 56; i++) |
411 | | c |= a[i]; |
412 | | return c; |
413 | | } |
414 | | |
415 | | /* Negates the field element. r = -a mod (2^448 - 2^224 - 1) |
416 | | * Add 0x200 to each element and subtract 2 from next. |
417 | | * Top element overflow handled by subtracting 2 from index 0 and 28. |
418 | | * |
419 | | * r [in] Field element to hold result. |
420 | | * a [in] Field element. |
421 | | */ |
422 | | void fe448_neg(word8* r, const word8* a) |
423 | | { |
424 | | int i; |
425 | | sword16 c = 0; |
426 | | sword16 o = 0; |
427 | | |
428 | | for (i = 0; i < 56; i++) { |
429 | | if (i == 28) |
430 | | c += 0x1fc; |
431 | | else |
432 | | c += 0x1fe; |
433 | | c -= a[i]; |
434 | | r[i] = (word8)c; |
435 | | c >>= 8; |
436 | | } |
437 | | |
438 | | for (i = 0; i < 56; i++) { |
439 | | if ((i == 0) || (i == 28)) o += c; |
440 | | o += r[i]; |
441 | | r[i] = (word8)o; |
442 | | o >>= 8; |
443 | | } |
444 | | } |
445 | | |
446 | | /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1 |
447 | | * Used for calculating y-ordinate from x-ordinate for Ed448. |
448 | | * |
449 | | * r [in] Field element to hold result. |
450 | | * a [in] Field element to exponentiate. |
451 | | */ |
452 | | void fe448_pow_2_446_222_1(word8* r, const word8* a) |
453 | | { |
454 | | int i; |
455 | | word8 t[56]; |
456 | | |
457 | | fe448_sqr(t, a); |
458 | | fe448_mul(t, t, a); |
459 | | for (i = 0; i < 221; i++) { |
460 | | fe448_sqr(t, t); |
461 | | fe448_mul(t, t, a); |
462 | | } |
463 | | fe448_sqr(t, t); |
464 | | for (i = 0; i < 221; i++) { |
465 | | fe448_sqr(t, t); |
466 | | fe448_mul(t, t, a); |
467 | | } |
468 | | fe448_sqr(t, t); |
469 | | fe448_mul(r, t, a); |
470 | | } |
471 | | |
472 | | /* Constant time, conditional move of b into a. |
473 | | * a is not changed if the condition is 0. |
474 | | * |
475 | | * a A field element. |
476 | | * b A field element. |
477 | | * c If 1 then copy and if 0 then don't copy. |
478 | | */ |
479 | | void fe448_cmov(word8* a, const word8* b, int c) |
480 | | { |
481 | | int i; |
482 | | word8 m = -(word8)c; |
483 | | word8 t[56]; |
484 | | |
485 | | for (i = 0; i < 56; i++) |
486 | | t[i] = m & (a[i] ^ b[i]); |
487 | | for (i = 0; i < 56; i++) |
488 | | a[i] ^= t[i]; |
489 | | } |
490 | | |
491 | | #endif /* HAVE_ED448 */ |
492 | | #elif defined(CURVED448_128BIT) |
493 | | |
494 | | /* Initialize the field element operations. |
495 | | */ |
496 | | void fe448_init(void) |
497 | 7.41k | { |
498 | 7.41k | } |
499 | | |
500 | | /* Convert the field element from a byte array to an array of 56-bits. |
501 | | * |
502 | | * r [in] Array to encode into. |
503 | | * b [in] Byte array. |
504 | | */ |
505 | | void fe448_from_bytes(sword64* r, const unsigned char* b) |
506 | 6.32k | { |
507 | 6.32k | r[ 0] = (sword64)( |
508 | 6.32k | (((word64)b[ 0]) << 0) |
509 | 6.32k | | (((word64)b[ 1]) << 8) |
510 | 6.32k | | (((word64)b[ 2]) << 16) |
511 | 6.32k | | (((word64)b[ 3]) << 24) |
512 | 6.32k | | (((word64)b[ 4]) << 32) |
513 | 6.32k | | (((word64)b[ 5]) << 40) |
514 | 6.32k | | (((word64)b[ 6]) << 48)); |
515 | 6.32k | r[ 1] = (sword64)( |
516 | 6.32k | (((word64)b[ 7]) << 0) |
517 | 6.32k | | (((word64)b[ 8]) << 8) |
518 | 6.32k | | (((word64)b[ 9]) << 16) |
519 | 6.32k | | (((word64)b[10]) << 24) |
520 | 6.32k | | (((word64)b[11]) << 32) |
521 | 6.32k | | (((word64)b[12]) << 40) |
522 | 6.32k | | (((word64)b[13]) << 48)); |
523 | 6.32k | r[ 2] = (sword64)( |
524 | 6.32k | (((word64)b[14]) << 0) |
525 | 6.32k | | (((word64)b[15]) << 8) |
526 | 6.32k | | (((word64)b[16]) << 16) |
527 | 6.32k | | (((word64)b[17]) << 24) |
528 | 6.32k | | (((word64)b[18]) << 32) |
529 | 6.32k | | (((word64)b[19]) << 40) |
530 | 6.32k | | (((word64)b[20]) << 48)); |
531 | 6.32k | r[ 3] = (sword64)( |
532 | 6.32k | (((word64)b[21]) << 0) |
533 | 6.32k | | (((word64)b[22]) << 8) |
534 | 6.32k | | (((word64)b[23]) << 16) |
535 | 6.32k | | (((word64)b[24]) << 24) |
536 | 6.32k | | (((word64)b[25]) << 32) |
537 | 6.32k | | (((word64)b[26]) << 40) |
538 | 6.32k | | (((word64)b[27]) << 48)); |
539 | 6.32k | r[ 4] = (sword64)( |
540 | 6.32k | (((word64)b[28]) << 0) |
541 | 6.32k | | (((word64)b[29]) << 8) |
542 | 6.32k | | (((word64)b[30]) << 16) |
543 | 6.32k | | (((word64)b[31]) << 24) |
544 | 6.32k | | (((word64)b[32]) << 32) |
545 | 6.32k | | (((word64)b[33]) << 40) |
546 | 6.32k | | (((word64)b[34]) << 48)); |
547 | 6.32k | r[ 5] = (sword64)( |
548 | 6.32k | (((word64)b[35]) << 0) |
549 | 6.32k | | (((word64)b[36]) << 8) |
550 | 6.32k | | (((word64)b[37]) << 16) |
551 | 6.32k | | (((word64)b[38]) << 24) |
552 | 6.32k | | (((word64)b[39]) << 32) |
553 | 6.32k | | (((word64)b[40]) << 40) |
554 | 6.32k | | (((word64)b[41]) << 48)); |
555 | 6.32k | r[ 6] = (sword64)( |
556 | 6.32k | (((word64)b[42]) << 0) |
557 | 6.32k | | (((word64)b[43]) << 8) |
558 | 6.32k | | (((word64)b[44]) << 16) |
559 | 6.32k | | (((word64)b[45]) << 24) |
560 | 6.32k | | (((word64)b[46]) << 32) |
561 | 6.32k | | (((word64)b[47]) << 40) |
562 | 6.32k | | (((word64)b[48]) << 48)); |
563 | 6.32k | r[ 7] = (sword64)( |
564 | 6.32k | (((word64)b[49]) << 0) |
565 | 6.32k | | (((word64)b[50]) << 8) |
566 | 6.32k | | (((word64)b[51]) << 16) |
567 | 6.32k | | (((word64)b[52]) << 24) |
568 | 6.32k | | (((word64)b[53]) << 32) |
569 | 6.32k | | (((word64)b[54]) << 40) |
570 | 6.32k | | (((word64)b[55]) << 48)); |
571 | 6.32k | } |
572 | | |
573 | | /* Convert the field element to a byte array from an array of 56-bits. |
574 | | * |
575 | | * b [in] Byte array. |
576 | | * a [in] Array to encode into. |
577 | | */ |
578 | | void fe448_to_bytes(unsigned char* b, const sword64* a) |
579 | 24.5k | { |
580 | 24.5k | sword64 t; |
581 | | /* Mod */ |
582 | 24.5k | sword64 in0 = a[0]; |
583 | 24.5k | sword64 in1 = a[1]; |
584 | 24.5k | sword64 in2 = a[2]; |
585 | 24.5k | sword64 in3 = a[3]; |
586 | 24.5k | sword64 in4 = a[4]; |
587 | 24.5k | sword64 in5 = a[5]; |
588 | 24.5k | sword64 in6 = a[6]; |
589 | 24.5k | sword64 in7 = a[7]; |
590 | 24.5k | sword64 o = in7 >> 56; |
591 | 24.5k | in7 -= (sword64)((word64)o << 56); |
592 | 24.5k | in0 += o; |
593 | 24.5k | in4 += o; |
594 | 24.5k | o = (in0 + 1) >> 56; |
595 | 24.5k | o = (o + in1) >> 56; |
596 | 24.5k | o = (o + in2) >> 56; |
597 | 24.5k | o = (o + in3) >> 56; |
598 | 24.5k | o = (o + in4 + 1) >> 56; |
599 | 24.5k | o = (o + in5) >> 56; |
600 | 24.5k | o = (o + in6) >> 56; |
601 | 24.5k | o = (o + in7) >> 56; |
602 | 24.5k | in0 += o; |
603 | 24.5k | in4 += o; |
604 | 24.5k | in7 -= (sword64)((word64)o << 56); |
605 | 24.5k | o = (sword64)(in0 >> 56); in1 += o; |
606 | 24.5k | t = (sword64)((word64)o << 56); in0 -= t; |
607 | 24.5k | o = (sword64)(in1 >> 56); in2 += o; |
608 | 24.5k | t = (sword64)((word64)o << 56); in1 -= t; |
609 | 24.5k | o = (sword64)(in2 >> 56); in3 += o; |
610 | 24.5k | t = (sword64)((word64)o << 56); in2 -= t; |
611 | 24.5k | o = (sword64)(in3 >> 56); in4 += o; |
612 | 24.5k | t = (sword64)((word64)o << 56); in3 -= t; |
613 | 24.5k | o = (sword64)(in4 >> 56); in5 += o; |
614 | 24.5k | t = (sword64)((word64)o << 56); in4 -= t; |
615 | 24.5k | o = (sword64)(in5 >> 56); in6 += o; |
616 | 24.5k | t = (sword64)((word64)o << 56); in5 -= t; |
617 | 24.5k | o = (sword64)(in6 >> 56); in7 += o; |
618 | 24.5k | t = (sword64)((word64)o << 56); in6 -= t; |
619 | 24.5k | o = (sword64)(in7 >> 56); in0 += o; |
620 | 24.5k | in4 += o; |
621 | 24.5k | t = (sword64)((word64)o << 56); in7 -= t; |
622 | | |
623 | | /* Output as bytes */ |
624 | 24.5k | b[ 0] = (byte)(in0 >> 0); |
625 | 24.5k | b[ 1] = (byte)(in0 >> 8); |
626 | 24.5k | b[ 2] = (byte)(in0 >> 16); |
627 | 24.5k | b[ 3] = (byte)(in0 >> 24); |
628 | 24.5k | b[ 4] = (byte)(in0 >> 32); |
629 | 24.5k | b[ 5] = (byte)(in0 >> 40); |
630 | 24.5k | b[ 6] = (byte)(in0 >> 48); |
631 | 24.5k | b[ 7] = (byte)(in1 >> 0); |
632 | 24.5k | b[ 8] = (byte)(in1 >> 8); |
633 | 24.5k | b[ 9] = (byte)(in1 >> 16); |
634 | 24.5k | b[10] = (byte)(in1 >> 24); |
635 | 24.5k | b[11] = (byte)(in1 >> 32); |
636 | 24.5k | b[12] = (byte)(in1 >> 40); |
637 | 24.5k | b[13] = (byte)(in1 >> 48); |
638 | 24.5k | b[14] = (byte)(in2 >> 0); |
639 | 24.5k | b[15] = (byte)(in2 >> 8); |
640 | 24.5k | b[16] = (byte)(in2 >> 16); |
641 | 24.5k | b[17] = (byte)(in2 >> 24); |
642 | 24.5k | b[18] = (byte)(in2 >> 32); |
643 | 24.5k | b[19] = (byte)(in2 >> 40); |
644 | 24.5k | b[20] = (byte)(in2 >> 48); |
645 | 24.5k | b[21] = (byte)(in3 >> 0); |
646 | 24.5k | b[22] = (byte)(in3 >> 8); |
647 | 24.5k | b[23] = (byte)(in3 >> 16); |
648 | 24.5k | b[24] = (byte)(in3 >> 24); |
649 | 24.5k | b[25] = (byte)(in3 >> 32); |
650 | 24.5k | b[26] = (byte)(in3 >> 40); |
651 | 24.5k | b[27] = (byte)(in3 >> 48); |
652 | 24.5k | b[28] = (byte)(in4 >> 0); |
653 | 24.5k | b[29] = (byte)(in4 >> 8); |
654 | 24.5k | b[30] = (byte)(in4 >> 16); |
655 | 24.5k | b[31] = (byte)(in4 >> 24); |
656 | 24.5k | b[32] = (byte)(in4 >> 32); |
657 | 24.5k | b[33] = (byte)(in4 >> 40); |
658 | 24.5k | b[34] = (byte)(in4 >> 48); |
659 | 24.5k | b[35] = (byte)(in5 >> 0); |
660 | 24.5k | b[36] = (byte)(in5 >> 8); |
661 | 24.5k | b[37] = (byte)(in5 >> 16); |
662 | 24.5k | b[38] = (byte)(in5 >> 24); |
663 | 24.5k | b[39] = (byte)(in5 >> 32); |
664 | 24.5k | b[40] = (byte)(in5 >> 40); |
665 | 24.5k | b[41] = (byte)(in5 >> 48); |
666 | 24.5k | b[42] = (byte)(in6 >> 0); |
667 | 24.5k | b[43] = (byte)(in6 >> 8); |
668 | 24.5k | b[44] = (byte)(in6 >> 16); |
669 | 24.5k | b[45] = (byte)(in6 >> 24); |
670 | 24.5k | b[46] = (byte)(in6 >> 32); |
671 | 24.5k | b[47] = (byte)(in6 >> 40); |
672 | 24.5k | b[48] = (byte)(in6 >> 48); |
673 | 24.5k | b[49] = (byte)(in7 >> 0); |
674 | 24.5k | b[50] = (byte)(in7 >> 8); |
675 | 24.5k | b[51] = (byte)(in7 >> 16); |
676 | 24.5k | b[52] = (byte)(in7 >> 24); |
677 | 24.5k | b[53] = (byte)(in7 >> 32); |
678 | 24.5k | b[54] = (byte)(in7 >> 40); |
679 | 24.5k | b[55] = (byte)(in7 >> 48); |
680 | 24.5k | } |
681 | | |
682 | | /* Set the field element to 0. |
683 | | * |
684 | | * a [in] Field element. |
685 | | */ |
686 | | void fe448_1(sword64* a) |
687 | 583k | { |
688 | 583k | a[0] = 1; |
689 | 583k | a[1] = 0; |
690 | 583k | a[2] = 0; |
691 | 583k | a[3] = 0; |
692 | 583k | a[4] = 0; |
693 | 583k | a[5] = 0; |
694 | 583k | a[6] = 0; |
695 | 583k | a[7] = 0; |
696 | 583k | } |
697 | | |
698 | | /* Set the field element to 0. |
699 | | * |
700 | | * a [in] Field element. |
701 | | */ |
702 | | void fe448_0(sword64* a) |
703 | 570k | { |
704 | 570k | a[0] = 0; |
705 | 570k | a[1] = 0; |
706 | 570k | a[2] = 0; |
707 | 570k | a[3] = 0; |
708 | 570k | a[4] = 0; |
709 | 570k | a[5] = 0; |
710 | 570k | a[6] = 0; |
711 | 570k | a[7] = 0; |
712 | 570k | } |
713 | | |
714 | | /* Copy one field element into another: d = a. |
715 | | * |
716 | | * d [in] Destination field element. |
717 | | * a [in] Source field element. |
718 | | */ |
719 | | void fe448_copy(sword64* d, const sword64* a) |
720 | 16.8k | { |
721 | 16.8k | d[0] = a[0]; |
722 | 16.8k | d[1] = a[1]; |
723 | 16.8k | d[2] = a[2]; |
724 | 16.8k | d[3] = a[3]; |
725 | 16.8k | d[4] = a[4]; |
726 | 16.8k | d[5] = a[5]; |
727 | 16.8k | d[6] = a[6]; |
728 | 16.8k | d[7] = a[7]; |
729 | 16.8k | } |
730 | | |
731 | | /* Conditionally swap the elements. |
732 | | * Constant time implementation. |
733 | | * |
734 | | * a [in] First field element. |
735 | | * b [in] Second field element. |
736 | | * c [in] Swap when 1. Valid values: 0, 1. |
737 | | */ |
738 | | static void fe448_cswap(sword64* a, sword64* b, int c) |
739 | 1.43M | { |
740 | 1.43M | sword64 mask = -(sword64)c; |
741 | 1.43M | sword64 t0 = (a[0] ^ b[0]) & mask; |
742 | 1.43M | sword64 t1 = (a[1] ^ b[1]) & mask; |
743 | 1.43M | sword64 t2 = (a[2] ^ b[2]) & mask; |
744 | 1.43M | sword64 t3 = (a[3] ^ b[3]) & mask; |
745 | 1.43M | sword64 t4 = (a[4] ^ b[4]) & mask; |
746 | 1.43M | sword64 t5 = (a[5] ^ b[5]) & mask; |
747 | 1.43M | sword64 t6 = (a[6] ^ b[6]) & mask; |
748 | 1.43M | sword64 t7 = (a[7] ^ b[7]) & mask; |
749 | 1.43M | a[0] ^= t0; |
750 | 1.43M | a[1] ^= t1; |
751 | 1.43M | a[2] ^= t2; |
752 | 1.43M | a[3] ^= t3; |
753 | 1.43M | a[4] ^= t4; |
754 | 1.43M | a[5] ^= t5; |
755 | 1.43M | a[6] ^= t6; |
756 | 1.43M | a[7] ^= t7; |
757 | 1.43M | b[0] ^= t0; |
758 | 1.43M | b[1] ^= t1; |
759 | 1.43M | b[2] ^= t2; |
760 | 1.43M | b[3] ^= t3; |
761 | 1.43M | b[4] ^= t4; |
762 | 1.43M | b[5] ^= t5; |
763 | 1.43M | b[6] ^= t6; |
764 | 1.43M | b[7] ^= t7; |
765 | 1.43M | } |
766 | | |
767 | | /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1) |
768 | | * |
769 | | * r [in] Field element to hold sum. |
770 | | * a [in] Field element to add. |
771 | | * b [in] Field element to add. |
772 | | */ |
773 | | void fe448_add(sword64* r, const sword64* a, const sword64* b) |
774 | 7.74M | { |
775 | 7.74M | r[0] = a[0] + b[0]; |
776 | 7.74M | r[1] = a[1] + b[1]; |
777 | 7.74M | r[2] = a[2] + b[2]; |
778 | 7.74M | r[3] = a[3] + b[3]; |
779 | 7.74M | r[4] = a[4] + b[4]; |
780 | 7.74M | r[5] = a[5] + b[5]; |
781 | 7.74M | r[6] = a[6] + b[6]; |
782 | 7.74M | r[7] = a[7] + b[7]; |
783 | 7.74M | } |
784 | | |
785 | | /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1) |
786 | | * |
787 | | * r [in] Field element to hold difference. |
788 | | * a [in] Field element to subtract from. |
789 | | * b [in] Field element to subtract. |
790 | | */ |
791 | | void fe448_sub(sword64* r, const sword64* a, const sword64* b) |
792 | 8.33M | { |
793 | 8.33M | r[0] = a[0] - b[0]; |
794 | 8.33M | r[1] = a[1] - b[1]; |
795 | 8.33M | r[2] = a[2] - b[2]; |
796 | 8.33M | r[3] = a[3] - b[3]; |
797 | 8.33M | r[4] = a[4] - b[4]; |
798 | 8.33M | r[5] = a[5] - b[5]; |
799 | 8.33M | r[6] = a[6] - b[6]; |
800 | 8.33M | r[7] = a[7] - b[7]; |
801 | 8.33M | } |
802 | | |
803 | | /* Multiply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1) |
804 | | * |
805 | | * r [in] Field element to hold result. |
806 | | * a [in] Field element to multiply. |
807 | | */ |
808 | | void fe448_mul39081(sword64* r, const sword64* a) |
809 | 1.51M | { |
810 | 1.51M | sword128 t; |
811 | 1.51M | sword64 o; |
812 | 1.51M | sword128 t0 = a[0] * (sword128)39081; |
813 | 1.51M | sword128 t1 = a[1] * (sword128)39081; |
814 | 1.51M | sword128 t2 = a[2] * (sword128)39081; |
815 | 1.51M | sword128 t3 = a[3] * (sword128)39081; |
816 | 1.51M | sword128 t4 = a[4] * (sword128)39081; |
817 | 1.51M | sword128 t5 = a[5] * (sword128)39081; |
818 | 1.51M | sword128 t6 = a[6] * (sword128)39081; |
819 | 1.51M | sword128 t7 = a[7] * (sword128)39081; |
820 | 1.51M | o = (sword64)(t0 >> 56); t1 += o; |
821 | 1.51M | t = (sword128)((word128)o << 56); t0 -= t; |
822 | 1.51M | o = (sword64)(t1 >> 56); t2 += o; |
823 | 1.51M | t = (sword128)((word128)o << 56); t1 -= t; |
824 | 1.51M | o = (sword64)(t2 >> 56); t3 += o; |
825 | 1.51M | t = (sword128)((word128)o << 56); t2 -= t; |
826 | 1.51M | o = (sword64)(t3 >> 56); t4 += o; |
827 | 1.51M | t = (sword128)((word128)o << 56); t3 -= t; |
828 | 1.51M | o = (sword64)(t4 >> 56); t5 += o; |
829 | 1.51M | t = (sword128)((word128)o << 56); t4 -= t; |
830 | 1.51M | o = (sword64)(t5 >> 56); t6 += o; |
831 | 1.51M | t = (sword128)((word128)o << 56); t5 -= t; |
832 | 1.51M | o = (sword64)(t6 >> 56); t7 += o; |
833 | 1.51M | t = (sword128)((word128)o << 56); t6 -= t; |
834 | 1.51M | o = (sword64)(t7 >> 56); t0 += o; |
835 | 1.51M | t4 += o; |
836 | 1.51M | t = (sword128)((word128)o << 56); t7 -= t; |
837 | | |
838 | | /* Store */ |
839 | 1.51M | r[0] = (sword64)t0; |
840 | 1.51M | r[1] = (sword64)t1; |
841 | 1.51M | r[2] = (sword64)t2; |
842 | 1.51M | r[3] = (sword64)t3; |
843 | 1.51M | r[4] = (sword64)t4; |
844 | 1.51M | r[5] = (sword64)t5; |
845 | 1.51M | r[6] = (sword64)t6; |
846 | 1.51M | r[7] = (sword64)t7; |
847 | 1.51M | } |
848 | | |
849 | | /* Multiply two field elements. r = (a * b) mod (2^448 - 2^224 - 1) |
850 | | * |
851 | | * r [in] Field element to hold result. |
852 | | * a [in] Field element to multiply. |
853 | | * b [in] Field element to multiply. |
854 | | */ |
855 | | void fe448_mul(sword64* r, const sword64* a, const sword64* b) |
856 | 13.4M | { |
857 | 13.4M | sword64 o; |
858 | 13.4M | sword64 a1[4]; |
859 | 13.4M | sword64 b1[4]; |
860 | 13.4M | sword128 t0; |
861 | 13.4M | sword128 t1; |
862 | 13.4M | sword128 t2; |
863 | 13.4M | sword128 t3; |
864 | 13.4M | sword128 t4; |
865 | 13.4M | sword128 t5; |
866 | 13.4M | sword128 t6; |
867 | 13.4M | sword128 t7; |
868 | 13.4M | sword128 t03; |
869 | 13.4M | sword128 t04; |
870 | 13.4M | sword128 t13; |
871 | 13.4M | sword128 t14; |
872 | | |
873 | 13.4M | a1[0] = a[0] + a[4]; |
874 | 13.4M | a1[1] = a[1] + a[5]; |
875 | 13.4M | a1[2] = a[2] + a[6]; |
876 | 13.4M | a1[3] = a[3] + a[7]; |
877 | 13.4M | b1[0] = b[0] + b[4]; |
878 | 13.4M | b1[1] = b[1] + b[5]; |
879 | 13.4M | b1[2] = b[2] + b[6]; |
880 | 13.4M | b1[3] = b[3] + b[7]; |
881 | | |
882 | 13.4M | t03 = ((sword128)a[0] * b[3]) + ((sword128)a[1] * b[2]) |
883 | 13.4M | + ((sword128)a[2] * b[1]) + ((sword128)a[3] * b[0]); |
884 | 13.4M | t04 = ((sword128)a[1] * b[3]) + ((sword128)a[2] * b[2]) |
885 | 13.4M | + ((sword128)a[3] * b[1]); |
886 | 13.4M | t04 += t03 >> 56; |
887 | 13.4M | t03 &= 0xffffffffffffffL; |
888 | 13.4M | t13 = ((sword128)a1[0] * b1[3]) + ((sword128)a1[1] * b1[2]) |
889 | 13.4M | + ((sword128)a1[2] * b1[1]) + ((sword128)a1[3] * b1[0]); |
890 | 13.4M | t14 = ((sword128)a1[1] * b1[3]) + ((sword128)a1[2] * b1[2]) |
891 | 13.4M | + ((sword128)a1[3] * b1[1]); |
892 | 13.4M | t14 += t13 >> 56; |
893 | 13.4M | t13 &= 0xffffffffffffffL; |
894 | | |
895 | 13.4M | t0 = ((sword128)a[0] * b[0]) + ((sword128)a[4] * b[4]) + t14 + -t04; |
896 | 13.4M | t1 = ((sword128)a[0] * b[1]) + ((sword128)a[1] * b[0]) |
897 | 13.4M | + ((sword128)a[4] * b[5]) + ((sword128)a[5] * b[4]) |
898 | 13.4M | + ((sword128)a1[2] * b1[3]) + ((sword128)a1[3] * b1[2]) |
899 | 13.4M | - ((sword128)a[2] * b[3]) - ((sword128)a[3] * b[2]); |
900 | 13.4M | o = (sword64)(t0 >> 56); t1 += o; t0 &= 0xffffffffffffffL; |
901 | 13.4M | t2 = ((sword128)a[0] * b[2]) + ((sword128)a[1] * b[1]) |
902 | 13.4M | + ((sword128)a[2] * b[0]) + ((sword128)a[4] * b[6]) |
903 | 13.4M | + ((sword128)a[5] * b[5]) + ((sword128)a[6] * b[4]) |
904 | 13.4M | + ((sword128)a1[3] * b1[3]) - ((sword128)a[3] * b[3]); |
905 | 13.4M | o = (sword64)(t1 >> 56); t2 += o; t1 &= 0xffffffffffffffL; |
906 | 13.4M | t3 = t03 + ((sword128)a[4] * b[7]) + ((sword128)a[5] * b[6]) |
907 | 13.4M | + ((sword128)a[6] * b[5]) + ((sword128)a[7] * b[4]); |
908 | 13.4M | o = (sword64)(t2 >> 56); t3 += o; t2 &= 0xffffffffffffffL; |
909 | 13.4M | t4 = ((sword128)a[5] * b[7]) + ((sword128)a[6] * b[6]) |
910 | 13.4M | + ((sword128)a[7] * b[5]) + ((sword128)a1[0] * b1[0]) |
911 | 13.4M | - ((sword128)a[0] * b[0]) + t14; |
912 | 13.4M | o = (sword64)(t3 >> 56); t4 += o; t3 &= 0xffffffffffffffL; |
913 | 13.4M | t5 = ((sword128)a[6] * b[7]) + ((sword128)a[7] * b[6]) |
914 | 13.4M | + ((sword128)a1[0] * b1[1]) + ((sword128)a1[1] * b1[0]) |
915 | 13.4M | - ((sword128)a[0] * b[1]) - ((sword128)a[1] * b[0]) |
916 | 13.4M | + ((sword128)a1[2] * b1[3]) + ((sword128)a1[3] * b1[2]); |
917 | 13.4M | o = (sword64)(t4 >> 56); t5 += o; t4 &= 0xffffffffffffffL; |
918 | 13.4M | t6 = ((sword128)a[7] * b[7]) + ((sword128)a1[0] * b1[2]) |
919 | 13.4M | + ((sword128)a1[1] * b1[1]) + ((sword128)a1[2] * b1[0]) |
920 | 13.4M | - ((sword128)a[0] * b[2]) - ((sword128)a[1] * b[1]) |
921 | 13.4M | - ((sword128)a[2] * b[0]) + ((sword128)a1[3] * b1[3]); |
922 | 13.4M | o = (sword64)(t5 >> 56); t6 += o; t5 &= 0xffffffffffffffL; |
923 | 13.4M | t7 = t13 + -t03; |
924 | 13.4M | o = (sword64)(t6 >> 56); t7 += o; t6 &= 0xffffffffffffffL; |
925 | 13.4M | o = (sword64)(t7 >> 56); t0 += o; |
926 | 13.4M | t4 += o; t7 &= 0xffffffffffffffL; |
927 | | |
928 | | /* Store */ |
929 | 13.4M | r[0] = (sword64)t0; |
930 | 13.4M | r[1] = (sword64)t1; |
931 | 13.4M | r[2] = (sword64)t2; |
932 | 13.4M | r[3] = (sword64)t3; |
933 | 13.4M | r[4] = (sword64)t4; |
934 | 13.4M | r[5] = (sword64)t5; |
935 | 13.4M | r[6] = (sword64)t6; |
936 | 13.4M | r[7] = (sword64)t7; |
937 | 13.4M | } |
938 | | |
939 | | /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1) |
940 | | * |
941 | | * r [in] Field element to hold result. |
942 | | * a [in] Field element to square. |
943 | | */ |
944 | | void fe448_sqr(sword64* r, const sword64* a) |
945 | 12.8M | { |
946 | 12.8M | sword64 o; |
947 | 12.8M | sword64 a1[4]; |
948 | 12.8M | sword128 t0; |
949 | 12.8M | sword128 t1; |
950 | 12.8M | sword128 t2; |
951 | 12.8M | sword128 t3; |
952 | 12.8M | sword128 t4; |
953 | 12.8M | sword128 t5; |
954 | 12.8M | sword128 t6; |
955 | 12.8M | sword128 t7; |
956 | 12.8M | sword128 t03; |
957 | 12.8M | sword128 t04; |
958 | 12.8M | sword128 t13; |
959 | 12.8M | sword128 t14; |
960 | | |
961 | 12.8M | a1[0] = a[0] + a[4]; |
962 | 12.8M | a1[1] = a[1] + a[5]; |
963 | 12.8M | a1[2] = a[2] + a[6]; |
964 | 12.8M | a1[3] = a[3] + a[7]; |
965 | | |
966 | 12.8M | t03 = ((sword128)a[0] * (2 * a[3])) + ((sword128)a[1] * (2 * a[2])); |
967 | 12.8M | t04 = ((sword128)a[1] * (2 * a[3])) + ((sword128)a[2] * a[2]); |
968 | 12.8M | t04 += t03 >> 56; |
969 | 12.8M | t03 &= 0xffffffffffffffL; |
970 | 12.8M | t13 = ((sword128)a1[0] * (2 * a1[3])) + ((sword128)a1[1] * (2 * a1[2])); |
971 | 12.8M | t14 = ((sword128)a1[1] * (2 * a1[3])) + ((sword128)a1[2] * a1[2]); |
972 | 12.8M | t14 += t13 >> 56; |
973 | 12.8M | t13 &= 0xffffffffffffffL; |
974 | | |
975 | 12.8M | t0 = ((sword128)a[0] * a[0]) + ((sword128)a[4] * a[4]) + t14 + -t04; |
976 | 12.8M | t1 = ((sword128)a[0] * (2 * a[1])) + ((sword128)a[4] * (2 * a[5])) |
977 | 12.8M | + ((sword128)a1[2] * (2 * a1[3])) - ((sword128)a[2] * (2 * a[3])); |
978 | 12.8M | o = (sword64)(t0 >> 56); t1 += o; t0 &= 0xffffffffffffffL; |
979 | 12.8M | t2 = ((sword128)a[0] * (2 * a[2])) + ((sword128)a[1] * a[1]) |
980 | 12.8M | + ((sword128)a[4] * (2 * a[6])) + ((sword128)a[5] * a[5]) |
981 | 12.8M | + ((sword128)a1[3] * a1[3]) - ((sword128)a[3] * a[3]); |
982 | 12.8M | o = (sword64)(t1 >> 56); t2 += o; t1 &= 0xffffffffffffffL; |
983 | 12.8M | t3 = t03 + ((sword128)a[4] * (2 * a[7])) + ((sword128)a[5] * (2 * a[6])); |
984 | 12.8M | o = (sword64)(t2 >> 56); t3 += o; t2 &= 0xffffffffffffffL; |
985 | 12.8M | t4 = ((sword128)a[5] * (2 * a[7])) + ((sword128)a[6] * a[6]) |
986 | 12.8M | + ((sword128)a1[0] * a1[0]) - ((sword128)a[0] * a[0]) + t14; |
987 | 12.8M | o = (sword64)(t3 >> 56); t4 += o; t3 &= 0xffffffffffffffL; |
988 | 12.8M | t5 = ((sword128)a[6] * (2 * a[7])) + ((sword128)a1[0] * (2 * a1[1])) |
989 | 12.8M | - ((sword128)a[0] * (2 * a[1])) + ((sword128)a1[2] * (2 * a1[3])); |
990 | 12.8M | o = (sword64)(t4 >> 56); t5 += o; t4 &= 0xffffffffffffffL; |
991 | 12.8M | t6 = ((sword128)a[7] * a[7]) + ((sword128)a1[0] * (2 * a1[2])) |
992 | 12.8M | + ((sword128)a1[1] * a1[1]) - ((sword128)a[0] * (2 * a[2])) |
993 | 12.8M | - ((sword128)a[1] * a[1]) + ((sword128)a1[3] * a1[3]); |
994 | 12.8M | o = (sword64)(t5 >> 56); t6 += o; t5 &= 0xffffffffffffffL; |
995 | 12.8M | t7 = t13 + -t03; |
996 | 12.8M | o = (sword64)(t6 >> 56); t7 += o; t6 &= 0xffffffffffffffL; |
997 | 12.8M | o = (sword64)(t7 >> 56); t0 += o; |
998 | 12.8M | t4 += o; t7 &= 0xffffffffffffffL; |
999 | | |
1000 | | /* Store */ |
1001 | 12.8M | r[0] = (sword64)t0; |
1002 | 12.8M | r[1] = (sword64)t1; |
1003 | 12.8M | r[2] = (sword64)t2; |
1004 | 12.8M | r[3] = (sword64)t3; |
1005 | 12.8M | r[4] = (sword64)t4; |
1006 | 12.8M | r[5] = (sword64)t5; |
1007 | 12.8M | r[6] = (sword64)t6; |
1008 | 12.8M | r[7] = (sword64)t7; |
1009 | 12.8M | } |
1010 | | |
1011 | | /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1 |
1012 | | * Constant time implementation - using Fermat's little theorem: |
1013 | | * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a |
1014 | | * For curve448: p - 2 = 2^448 - 2^224 - 3 |
1015 | | * |
1016 | | * r [in] Field element to hold result. |
1017 | | * a [in] Field element to invert. |
1018 | | */ |
1019 | | void fe448_invert(sword64* r, const sword64* a) |
1020 | 8.37k | { |
1021 | 8.37k | sword64 t1[8]; |
1022 | 8.37k | sword64 t2[8]; |
1023 | 8.37k | sword64 t3[8]; |
1024 | 8.37k | sword64 t4[8]; |
1025 | 8.37k | int i; |
1026 | | |
1027 | 8.37k | fe448_sqr(t1, a); |
1028 | | /* t1 = 2 */ |
1029 | 8.37k | fe448_mul(t1, t1, a); |
1030 | | /* t1 = 3 */ |
1031 | 16.7k | fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2); |
1032 | | /* t2 = c */ |
1033 | 8.37k | fe448_mul(t3, t2, a); |
1034 | | /* t3 = d */ |
1035 | 8.37k | fe448_mul(t1, t2, t1); |
1036 | | /* t1 = f */ |
1037 | 8.37k | fe448_sqr(t2, t1); |
1038 | | /* t2 = 1e */ |
1039 | 8.37k | fe448_mul(t4, t2, a); |
1040 | | /* t4 = 1f */ |
1041 | 41.8k | fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); |
1042 | | /* t2 = 3e0 */ |
1043 | 8.37k | fe448_mul(t1, t2, t4); |
1044 | | /* t1 = 3ff */ |
1045 | 83.7k | fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2); |
1046 | | /* t2 = ffc00 */ |
1047 | 8.37k | fe448_mul(t1, t2, t1); |
1048 | | /* t1 = fffff */ |
1049 | 41.8k | fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); |
1050 | | /* t2 = 1ffffe0 */ |
1051 | 8.37k | fe448_mul(t1, t2, t4); |
1052 | | /* t1 = 1ffffff */ |
1053 | 209k | fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2); |
1054 | | /* t2 = 3fffffe000000 */ |
1055 | 8.37k | fe448_mul(t1, t2, t1); |
1056 | | /* t1 = 3ffffffffffff */ |
1057 | 41.8k | fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); |
1058 | | /* t2 = 7fffffffffffe0 */ |
1059 | 8.37k | fe448_mul(t1, t2, t4); |
1060 | | /* t1 = 7fffffffffffff */ |
1061 | 460k | fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2); |
1062 | | /* t2 = 3fffffffffffff80000000000000 */ |
1063 | 8.37k | fe448_mul(t1, t2, t1); |
1064 | | /* t1 = 3fffffffffffffffffffffffffff */ |
1065 | 921k | fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2); |
1066 | | /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */ |
1067 | 8.37k | fe448_mul(t1, t2, t1); |
1068 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */ |
1069 | 33.4k | fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2); |
1070 | | /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */ |
1071 | 8.37k | fe448_mul(t3, t3, t2); |
1072 | | /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ |
1073 | 8.37k | fe448_mul(t1, t3, a); |
1074 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */ |
1075 | 1.87M | fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1); |
1076 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */ |
1077 | 8.37k | fe448_mul(r, t3, t1); |
1078 | | /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ |
1079 | 8.37k | } |
1080 | | |
1081 | | /* Scalar multiply the point by a number. r = n.a |
1082 | | * Uses Montgomery ladder and only requires the x-ordinate. |
1083 | | * |
1084 | | * r [in] Field element to hold result. |
1085 | | * n [in] Scalar as an array of bytes. |
1086 | | * a [in] Point to multiply - x-ordinate only. |
1087 | | */ |
1088 | | int curve448(byte* r, const byte* n, const byte* a) |
1089 | 1.60k | { |
1090 | 1.60k | sword64 x1[8]; |
1091 | 1.60k | sword64 x2[8]; |
1092 | 1.60k | sword64 z2[8]; |
1093 | 1.60k | sword64 x3[8]; |
1094 | 1.60k | sword64 z3[8]; |
1095 | 1.60k | sword64 t0[8]; |
1096 | 1.60k | sword64 t1[8]; |
1097 | 1.60k | int i; |
1098 | 1.60k | unsigned int swap; |
1099 | | |
1100 | 1.60k | fe448_from_bytes(x1, a); |
1101 | 1.60k | fe448_1(x2); |
1102 | 1.60k | fe448_0(z2); |
1103 | 1.60k | fe448_copy(x3, x1); |
1104 | 1.60k | fe448_1(z3); |
1105 | | |
1106 | 1.60k | swap = 0; |
1107 | 720k | for (i = 447; i >= 0; --i) { |
1108 | 719k | unsigned int b = (n[i >> 3] >> (i & 7)) & 1; |
1109 | 719k | swap ^= b; |
1110 | 719k | fe448_cswap(x2, x3, (int)swap); |
1111 | 719k | fe448_cswap(z2, z3, (int)swap); |
1112 | 719k | swap = b; |
1113 | | |
1114 | | /* Montgomery Ladder - double and add */ |
1115 | 719k | fe448_add(t0, x2, z2); |
1116 | 719k | fe448_reduce(t0); |
1117 | 719k | fe448_add(t1, x3, z3); |
1118 | 719k | fe448_reduce(t1); |
1119 | 719k | fe448_sub(x2, x2, z2); |
1120 | 719k | fe448_sub(x3, x3, z3); |
1121 | 719k | fe448_mul(t1, t1, x2); |
1122 | 719k | fe448_mul(z3, x3, t0); |
1123 | 719k | fe448_sqr(t0, t0); |
1124 | 719k | fe448_sqr(x2, x2); |
1125 | 719k | fe448_add(x3, z3, t1); |
1126 | 719k | fe448_reduce(x3); |
1127 | 719k | fe448_sqr(x3, x3); |
1128 | 719k | fe448_sub(z3, z3, t1); |
1129 | 719k | fe448_sqr(z3, z3); |
1130 | 719k | fe448_mul(z3, z3, x1); |
1131 | 719k | fe448_sub(t1, t0, x2); |
1132 | 719k | fe448_mul(x2, t0, x2); |
1133 | 719k | fe448_mul39081(z2, t1); |
1134 | 719k | fe448_add(z2, t0, z2); |
1135 | 719k | fe448_mul(z2, z2, t1); |
1136 | 719k | } |
1137 | | /* Last two bits are 0 - no final swap check required. */ |
1138 | | |
1139 | 1.60k | fe448_invert(z2, z2); |
1140 | 1.60k | fe448_mul(x2, x2, z2); |
1141 | 1.60k | fe448_to_bytes(r, x2); |
1142 | | |
1143 | 1.60k | return 0; |
1144 | 1.60k | } |
1145 | | |
1146 | | #ifdef HAVE_ED448 |
1147 | | /* Check whether field element is not 0. |
1148 | | * Must convert to a normalized form before checking. |
1149 | | * |
1150 | | * a [in] Field element. |
1151 | | * returns 0 when zero, and any other value otherwise. |
1152 | | */ |
1153 | | int fe448_isnonzero(const sword64* a) |
1154 | 4.71k | { |
1155 | 4.71k | byte b[56]; |
1156 | 4.71k | int i; |
1157 | 4.71k | byte c = 0; |
1158 | 4.71k | fe448_to_bytes(b, a); |
1159 | 268k | for (i = 0; i < 56; i++) |
1160 | 263k | c |= b[i]; |
1161 | 4.71k | return c; |
1162 | 4.71k | } |
1163 | | |
1164 | | /* Check whether field element is negative. |
1165 | | * Must convert to a normalized form before checking. |
1166 | | * |
1167 | | * a [in] Field element. |
1168 | | * returns 1 when negative, and 0 otherwise. |
1169 | | */ |
1170 | | int fe448_isnegative(const sword64* a) |
1171 | 11.4k | { |
1172 | 11.4k | byte b[56]; |
1173 | 11.4k | fe448_to_bytes(b, a); |
1174 | 11.4k | return b[0] & 1; |
1175 | 11.4k | } |
1176 | | |
1177 | | /* Negates the field element. r = -a |
1178 | | * |
1179 | | * r [in] Field element to hold result. |
1180 | | * a [in] Field element. |
1181 | | */ |
1182 | | void fe448_neg(sword64* r, const sword64* a) |
1183 | 574k | { |
1184 | 574k | r[0] = -a[0]; |
1185 | 574k | r[1] = -a[1]; |
1186 | 574k | r[2] = -a[2]; |
1187 | 574k | r[3] = -a[3]; |
1188 | 574k | r[4] = -a[4]; |
1189 | 574k | r[5] = -a[5]; |
1190 | 574k | r[6] = -a[6]; |
1191 | 574k | r[7] = -a[7]; |
1192 | 574k | } |
1193 | | |
1194 | | /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1 |
1195 | | * Used for calculating y-ordinate from x-ordinate for Ed448. |
1196 | | * |
1197 | | * r [in] Field element to hold result. |
1198 | | * a [in] Field element to exponentiate. |
1199 | | */ |
1200 | | void fe448_pow_2_446_222_1(sword64* r, const sword64* a) |
1201 | 4.71k | { |
1202 | 4.71k | sword64 t1[8]; |
1203 | 4.71k | sword64 t2[8]; |
1204 | 4.71k | sword64 t3[8]; |
1205 | 4.71k | sword64 t4[8]; |
1206 | 4.71k | sword64 t5[8]; |
1207 | 4.71k | int i; |
1208 | | |
1209 | 4.71k | fe448_sqr(t3, a); |
1210 | | /* t3 = 2 */ |
1211 | 4.71k | fe448_mul(t1, t3, a); |
1212 | | /* t1 = 3 */ |
1213 | 4.71k | fe448_sqr(t5, t1); |
1214 | | /* t5 = 6 */ |
1215 | 4.71k | fe448_mul(t5, t5, a); |
1216 | | /* t5 = 7 */ |
1217 | 9.42k | fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2); |
1218 | | /* t2 = c */ |
1219 | 4.71k | fe448_mul(t3, t2, t3); |
1220 | | /* t3 = e */ |
1221 | 4.71k | fe448_mul(t1, t2, t1); |
1222 | | /* t1 = f */ |
1223 | 14.1k | fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2); |
1224 | | /* t2 = 78 */ |
1225 | 4.71k | fe448_mul(t5, t2, t5); |
1226 | | /* t5 = 7f */ |
1227 | 18.8k | fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2); |
1228 | | /* t2 = f0 */ |
1229 | 4.71k | fe448_mul(t1, t2, t1); |
1230 | | /* t1 = ff */ |
1231 | 4.71k | fe448_mul(t3, t3, t2); |
1232 | | /* t3 = fe */ |
1233 | 32.9k | fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2); |
1234 | | /* t2 = 7f80 */ |
1235 | 4.71k | fe448_mul(t5, t2, t5); |
1236 | | /* t5 = 7fff */ |
1237 | 37.7k | fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2); |
1238 | | /* t2 = ff00 */ |
1239 | 4.71k | fe448_mul(t1, t2, t1); |
1240 | | /* t1 = ffff */ |
1241 | 4.71k | fe448_mul(t3, t3, t2); |
1242 | | /* t3 = fffe */ |
1243 | 70.7k | fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2); |
1244 | | /* t2 = 3fff8000 */ |
1245 | 4.71k | fe448_mul(t5, t2, t5); |
1246 | | /* t5 = 3fffffff */ |
1247 | 75.4k | fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2); |
1248 | | /* t2 = ffff0000 */ |
1249 | 4.71k | fe448_mul(t1, t2, t1); |
1250 | | /* t1 = ffffffff */ |
1251 | 4.71k | fe448_mul(t3, t3, t2); |
1252 | | /* t3 = fffffffe */ |
1253 | 150k | fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2); |
1254 | | /* t2 = ffffffff00000000 */ |
1255 | 4.71k | fe448_mul(t2, t2, t1); |
1256 | | /* t2 = ffffffffffffffff */ |
1257 | 301k | fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1); |
1258 | | /* t1 = ffffffffffffffff0000000000000000 */ |
1259 | 4.71k | fe448_mul(t1, t1, t2); |
1260 | | /* t1 = ffffffffffffffffffffffffffffffff */ |
1261 | 301k | fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1); |
1262 | | /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */ |
1263 | 4.71k | fe448_mul(t4, t1, t2); |
1264 | | /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */ |
1265 | 150k | fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2); |
1266 | | /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */ |
1267 | 4.71k | fe448_mul(t3, t3, t2); |
1268 | | /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */ |
1269 | 905k | fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1); |
1270 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */ |
1271 | 4.71k | fe448_mul(t1, t1, t4); |
1272 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */ |
1273 | 141k | fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1); |
1274 | | /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */ |
1275 | 4.71k | fe448_mul(r, t5, t1); |
1276 | | /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */ |
1277 | 4.71k | } |
1278 | | |
1279 | | /* Constant time, conditional move of b into a. |
1280 | | * a is not changed if the condition is 0. |
1281 | | * |
1282 | | * a A field element. |
1283 | | * b A field element. |
1284 | | * c If 1 then copy and if 0 then don't copy. |
1285 | | */ |
1286 | | void fe448_cmov(sword64* a, const sword64* b, int c) |
1287 | 9.64M | { |
1288 | 9.64M | sword64 m = -(sword64)c; |
1289 | 9.64M | sword64 t0 = m & (a[0] ^ b[0]); |
1290 | 9.64M | sword64 t1 = m & (a[1] ^ b[1]); |
1291 | 9.64M | sword64 t2 = m & (a[2] ^ b[2]); |
1292 | 9.64M | sword64 t3 = m & (a[3] ^ b[3]); |
1293 | 9.64M | sword64 t4 = m & (a[4] ^ b[4]); |
1294 | 9.64M | sword64 t5 = m & (a[5] ^ b[5]); |
1295 | 9.64M | sword64 t6 = m & (a[6] ^ b[6]); |
1296 | 9.64M | sword64 t7 = m & (a[7] ^ b[7]); |
1297 | | |
1298 | 9.64M | a[0] ^= t0; |
1299 | 9.64M | a[1] ^= t1; |
1300 | 9.64M | a[2] ^= t2; |
1301 | 9.64M | a[3] ^= t3; |
1302 | 9.64M | a[4] ^= t4; |
1303 | 9.64M | a[5] ^= t5; |
1304 | 9.64M | a[6] ^= t6; |
1305 | 9.64M | a[7] ^= t7; |
1306 | 9.64M | } |
1307 | | |
1308 | | #endif /* HAVE_ED448 */ |
1309 | | #else |
1310 | | |
1311 | | /* Initialize the field element operations. |
1312 | | */ |
1313 | | void fe448_init(void) |
1314 | | { |
1315 | | } |
1316 | | |
1317 | | /* Convert the field element from a byte array to an array of 28-bits. |
1318 | | * |
1319 | | * r [in] Array to encode into. |
1320 | | * b [in] Byte array. |
1321 | | */ |
1322 | | void fe448_from_bytes(sword32* r, const unsigned char* b) |
1323 | | { |
1324 | | r[ 0] = (sword32)( |
1325 | | (((word32)((b[ 0] ) >> 0)) << 0) |
1326 | | | (((word32)((b[ 1] ) >> 0)) << 8) |
1327 | | | (((word32)((b[ 2] ) >> 0)) << 16) |
1328 | | | ((((word32)((b[ 3] & 0xf )) >> 0)) << 24)); |
1329 | | r[ 1] = (sword32)( |
1330 | | (((word32)((b[ 3] ) >> 4)) << 0) |
1331 | | | (((word32)((b[ 4] ) >> 0)) << 4) |
1332 | | | (((word32)((b[ 5] ) >> 0)) << 12) |
1333 | | | (((word32)((b[ 6] ) >> 0)) << 20)); |
1334 | | r[ 2] = (sword32)( |
1335 | | (((word32)((b[ 7] ) >> 0)) << 0) |
1336 | | | (((word32)((b[ 8] ) >> 0)) << 8) |
1337 | | | (((word32)((b[ 9] ) >> 0)) << 16) |
1338 | | | ((((word32)((b[10] & 0xf )) >> 0)) << 24)); |
1339 | | r[ 3] = (sword32)( |
1340 | | (((word32)((b[10] ) >> 4)) << 0) |
1341 | | | (((word32)((b[11] ) >> 0)) << 4) |
1342 | | | (((word32)((b[12] ) >> 0)) << 12) |
1343 | | | (((word32)((b[13] ) >> 0)) << 20)); |
1344 | | r[ 4] = (sword32)( |
1345 | | (((word32)((b[14] ) >> 0)) << 0) |
1346 | | | (((word32)((b[15] ) >> 0)) << 8) |
1347 | | | (((word32)((b[16] ) >> 0)) << 16) |
1348 | | | ((((word32)((b[17] & 0xf )) >> 0)) << 24)); |
1349 | | r[ 5] = (sword32)( |
1350 | | (((word32)((b[17] ) >> 4)) << 0) |
1351 | | | (((word32)((b[18] ) >> 0)) << 4) |
1352 | | | (((word32)((b[19] ) >> 0)) << 12) |
1353 | | | (((word32)((b[20] ) >> 0)) << 20)); |
1354 | | r[ 6] = (sword32)( |
1355 | | (((word32)((b[21] ) >> 0)) << 0) |
1356 | | | (((word32)((b[22] ) >> 0)) << 8) |
1357 | | | (((word32)((b[23] ) >> 0)) << 16) |
1358 | | | ((((word32)((b[24] & 0xf )) >> 0)) << 24)); |
1359 | | r[ 7] = (sword32)( |
1360 | | (((word32)((b[24] ) >> 4)) << 0) |
1361 | | | (((word32)((b[25] ) >> 0)) << 4) |
1362 | | | (((word32)((b[26] ) >> 0)) << 12) |
1363 | | | (((word32)((b[27] ) >> 0)) << 20)); |
1364 | | r[ 8] = (sword32)( |
1365 | | (((word32)((b[28] ) >> 0)) << 0) |
1366 | | | (((word32)((b[29] ) >> 0)) << 8) |
1367 | | | (((word32)((b[30] ) >> 0)) << 16) |
1368 | | | ((((word32)((b[31] & 0xf )) >> 0)) << 24)); |
1369 | | r[ 9] = (sword32)( |
1370 | | (((word32)((b[31] ) >> 4)) << 0) |
1371 | | | (((word32)((b[32] ) >> 0)) << 4) |
1372 | | | (((word32)((b[33] ) >> 0)) << 12) |
1373 | | | (((word32)((b[34] ) >> 0)) << 20)); |
1374 | | r[10] = (sword32)( |
1375 | | (((word32)((b[35] ) >> 0)) << 0) |
1376 | | | (((word32)((b[36] ) >> 0)) << 8) |
1377 | | | (((word32)((b[37] ) >> 0)) << 16) |
1378 | | | ((((word32)((b[38] & 0xf )) >> 0)) << 24)); |
1379 | | r[11] = (sword32)( |
1380 | | (((word32)((b[38] ) >> 4)) << 0) |
1381 | | | (((word32)((b[39] ) >> 0)) << 4) |
1382 | | | (((word32)((b[40] ) >> 0)) << 12) |
1383 | | | (((word32)((b[41] ) >> 0)) << 20)); |
1384 | | r[12] = (sword32)( |
1385 | | (((word32)((b[42] ) >> 0)) << 0) |
1386 | | | (((word32)((b[43] ) >> 0)) << 8) |
1387 | | | (((word32)((b[44] ) >> 0)) << 16) |
1388 | | | ((((word32)((b[45] & 0xf )) >> 0)) << 24)); |
1389 | | r[13] = (sword32)( |
1390 | | (((word32)((b[45] ) >> 4)) << 0) |
1391 | | | (((word32)((b[46] ) >> 0)) << 4) |
1392 | | | (((word32)((b[47] ) >> 0)) << 12) |
1393 | | | (((word32)((b[48] ) >> 0)) << 20)); |
1394 | | r[14] = (sword32)( |
1395 | | (((word32)((b[49] ) >> 0)) << 0) |
1396 | | | (((word32)((b[50] ) >> 0)) << 8) |
1397 | | | (((word32)((b[51] ) >> 0)) << 16) |
1398 | | | ((((word32)((b[52] & 0xf )) >> 0)) << 24)); |
1399 | | r[15] = (sword32)( |
1400 | | (((word32)((b[52] ) >> 4)) << 0) |
1401 | | | (((word32)((b[53] ) >> 0)) << 4) |
1402 | | | (((word32)((b[54] ) >> 0)) << 12) |
1403 | | | (((word32)((b[55] ) >> 0)) << 20)); |
1404 | | } |
1405 | | |
1406 | | /* Convert the field element to a byte array from an array of 28-bits. |
1407 | | * |
1408 | | * b [in] Byte array. |
1409 | | * a [in] Array to encode into. |
1410 | | */ |
1411 | | void fe448_to_bytes(unsigned char* b, const sword32* a) |
1412 | | { |
1413 | | sword32 t; |
1414 | | /* Mod */ |
1415 | | sword32 in0 = a[0]; |
1416 | | sword32 in1 = a[1]; |
1417 | | sword32 in2 = a[2]; |
1418 | | sword32 in3 = a[3]; |
1419 | | sword32 in4 = a[4]; |
1420 | | sword32 in5 = a[5]; |
1421 | | sword32 in6 = a[6]; |
1422 | | sword32 in7 = a[7]; |
1423 | | sword32 in8 = a[8]; |
1424 | | sword32 in9 = a[9]; |
1425 | | sword32 in10 = a[10]; |
1426 | | sword32 in11 = a[11]; |
1427 | | sword32 in12 = a[12]; |
1428 | | sword32 in13 = a[13]; |
1429 | | sword32 in14 = a[14]; |
1430 | | sword32 in15 = a[15]; |
1431 | | sword32 o = in15 >> 28; |
1432 | | in15 -= (sword32)((word32)o << 28); |
1433 | | in0 += o; |
1434 | | in8 += o; |
1435 | | o = (in0 + 1) >> 28; |
1436 | | o = (o + in1) >> 28; |
1437 | | o = (o + in2) >> 28; |
1438 | | o = (o + in3) >> 28; |
1439 | | o = (o + in4) >> 28; |
1440 | | o = (o + in5) >> 28; |
1441 | | o = (o + in6) >> 28; |
1442 | | o = (o + in7) >> 28; |
1443 | | o = (o + in8 + 1) >> 28; |
1444 | | o = (o + in9) >> 28; |
1445 | | o = (o + in10) >> 28; |
1446 | | o = (o + in11) >> 28; |
1447 | | o = (o + in12) >> 28; |
1448 | | o = (o + in13) >> 28; |
1449 | | o = (o + in14) >> 28; |
1450 | | o = (o + in15) >> 28; |
1451 | | in0 += o; |
1452 | | in8 += o; |
1453 | | in15 -= (sword32)((word32)o << 28); |
1454 | | o = (sword32)(in0 >> 28); in1 += o; |
1455 | | t = (sword32)((word32)o << 28); in0 -= t; |
1456 | | o = (sword32)(in1 >> 28); in2 += o; |
1457 | | t = (sword32)((word32)o << 28); in1 -= t; |
1458 | | o = (sword32)(in2 >> 28); in3 += o; |
1459 | | t = (sword32)((word32)o << 28); in2 -= t; |
1460 | | o = (sword32)(in3 >> 28); in4 += o; |
1461 | | t = (sword32)((word32)o << 28); in3 -= t; |
1462 | | o = (sword32)(in4 >> 28); in5 += o; |
1463 | | t = (sword32)((word32)o << 28); in4 -= t; |
1464 | | o = (sword32)(in5 >> 28); in6 += o; |
1465 | | t = (sword32)((word32)o << 28); in5 -= t; |
1466 | | o = (sword32)(in6 >> 28); in7 += o; |
1467 | | t = (sword32)((word32)o << 28); in6 -= t; |
1468 | | o = (sword32)(in7 >> 28); in8 += o; |
1469 | | t = (sword32)((word32)o << 28); in7 -= t; |
1470 | | o = (sword32)(in8 >> 28); in9 += o; |
1471 | | t = (sword32)((word32)o << 28); in8 -= t; |
1472 | | o = (sword32)(in9 >> 28); in10 += o; |
1473 | | t = (sword32)((word32)o << 28); in9 -= t; |
1474 | | o = (sword32)(in10 >> 28); in11 += o; |
1475 | | t = (sword32)((word32)o << 28); in10 -= t; |
1476 | | o = (sword32)(in11 >> 28); in12 += o; |
1477 | | t = (sword32)((word32)o << 28); in11 -= t; |
1478 | | o = (sword32)(in12 >> 28); in13 += o; |
1479 | | t = (sword32)((word32)o << 28); in12 -= t; |
1480 | | o = (sword32)(in13 >> 28); in14 += o; |
1481 | | t = (sword32)((word32)o << 28); in13 -= t; |
1482 | | o = (sword32)(in14 >> 28); in15 += o; |
1483 | | t = (sword32)((word32)o << 28); in14 -= t; |
1484 | | o = (sword32)(in15 >> 28); in0 += o; |
1485 | | in8 += o; |
1486 | | t = (sword32)((word32)o << 28); in15 -= t; |
1487 | | |
1488 | | /* Output as bytes */ |
1489 | | b[ 0] = (byte)(in0 >> 0); |
1490 | | b[ 1] = (byte)(in0 >> 8); |
1491 | | b[ 2] = (byte)(in0 >> 16); |
1492 | | b[ 3] = (byte)((byte)(in0 >> 24) + (byte)((byte)(in1 >> 0) << 4)); |
1493 | | b[ 4] = (byte)(in1 >> 4); |
1494 | | b[ 5] = (byte)(in1 >> 12); |
1495 | | b[ 6] = (byte)(in1 >> 20); |
1496 | | b[ 7] = (byte)(in2 >> 0); |
1497 | | b[ 8] = (byte)(in2 >> 8); |
1498 | | b[ 9] = (byte)(in2 >> 16); |
1499 | | b[10] = (byte)((byte)(in2 >> 24) + (byte)((byte)(in3 >> 0) << 4)); |
1500 | | b[11] = (byte)(in3 >> 4); |
1501 | | b[12] = (byte)(in3 >> 12); |
1502 | | b[13] = (byte)(in3 >> 20); |
1503 | | b[14] = (byte)(in4 >> 0); |
1504 | | b[15] = (byte)(in4 >> 8); |
1505 | | b[16] = (byte)(in4 >> 16); |
1506 | | b[17] = (byte)((byte)(in4 >> 24) + (byte)((byte)(in5 >> 0) << 4)); |
1507 | | b[18] = (byte)(in5 >> 4); |
1508 | | b[19] = (byte)(in5 >> 12); |
1509 | | b[20] = (byte)(in5 >> 20); |
1510 | | b[21] = (byte)(in6 >> 0); |
1511 | | b[22] = (byte)(in6 >> 8); |
1512 | | b[23] = (byte)(in6 >> 16); |
1513 | | b[24] = (byte)((byte)(in6 >> 24) + (byte)((byte)(in7 >> 0) << 4)); |
1514 | | b[25] = (byte)(in7 >> 4); |
1515 | | b[26] = (byte)(in7 >> 12); |
1516 | | b[27] = (byte)(in7 >> 20); |
1517 | | b[28] = (byte)(in8 >> 0); |
1518 | | b[29] = (byte)(in8 >> 8); |
1519 | | b[30] = (byte)(in8 >> 16); |
1520 | | b[31] = (byte)((byte)(in8 >> 24) + (byte)((byte)(in9 >> 0) << 4)); |
1521 | | b[32] = (byte)(in9 >> 4); |
1522 | | b[33] = (byte)(in9 >> 12); |
1523 | | b[34] = (byte)(in9 >> 20); |
1524 | | b[35] = (byte)(in10 >> 0); |
1525 | | b[36] = (byte)(in10 >> 8); |
1526 | | b[37] = (byte)(in10 >> 16); |
1527 | | b[38] = (byte)((byte)(in10 >> 24) + (byte)((byte)(in11 >> 0) << 4)); |
1528 | | b[39] = (byte)(in11 >> 4); |
1529 | | b[40] = (byte)(in11 >> 12); |
1530 | | b[41] = (byte)(in11 >> 20); |
1531 | | b[42] = (byte)(in12 >> 0); |
1532 | | b[43] = (byte)(in12 >> 8); |
1533 | | b[44] = (byte)(in12 >> 16); |
1534 | | b[45] = (byte)((byte)(in12 >> 24) + (byte)((byte)(in13 >> 0) << 4)); |
1535 | | b[46] = (byte)(in13 >> 4); |
1536 | | b[47] = (byte)(in13 >> 12); |
1537 | | b[48] = (byte)(in13 >> 20); |
1538 | | b[49] = (byte)(in14 >> 0); |
1539 | | b[50] = (byte)(in14 >> 8); |
1540 | | b[51] = (byte)(in14 >> 16); |
1541 | | b[52] = (byte)((byte)(in14 >> 24) + (byte)((byte)(in15 >> 0) << 4)); |
1542 | | b[53] = (byte)(in15 >> 4); |
1543 | | b[54] = (byte)(in15 >> 12); |
1544 | | b[55] = (byte)(in15 >> 20); |
1545 | | } |
1546 | | |
1547 | | /* Set the field element to 0. |
1548 | | * |
1549 | | * a [in] Field element. |
1550 | | */ |
1551 | | void fe448_1(sword32* a) |
1552 | | { |
1553 | | a[0] = 1; |
1554 | | a[1] = 0; |
1555 | | a[2] = 0; |
1556 | | a[3] = 0; |
1557 | | a[4] = 0; |
1558 | | a[5] = 0; |
1559 | | a[6] = 0; |
1560 | | a[7] = 0; |
1561 | | a[8] = 0; |
1562 | | a[9] = 0; |
1563 | | a[10] = 0; |
1564 | | a[11] = 0; |
1565 | | a[12] = 0; |
1566 | | a[13] = 0; |
1567 | | a[14] = 0; |
1568 | | a[15] = 0; |
1569 | | } |
1570 | | |
1571 | | /* Set the field element to 0. |
1572 | | * |
1573 | | * a [in] Field element. |
1574 | | */ |
1575 | | void fe448_0(sword32* a) |
1576 | | { |
1577 | | a[0] = 0; |
1578 | | a[1] = 0; |
1579 | | a[2] = 0; |
1580 | | a[3] = 0; |
1581 | | a[4] = 0; |
1582 | | a[5] = 0; |
1583 | | a[6] = 0; |
1584 | | a[7] = 0; |
1585 | | a[8] = 0; |
1586 | | a[9] = 0; |
1587 | | a[10] = 0; |
1588 | | a[11] = 0; |
1589 | | a[12] = 0; |
1590 | | a[13] = 0; |
1591 | | a[14] = 0; |
1592 | | a[15] = 0; |
1593 | | } |
1594 | | |
1595 | | /* Copy one field element into another: d = a. |
1596 | | * |
1597 | | * d [in] Destination field element. |
1598 | | * a [in] Source field element. |
1599 | | */ |
1600 | | void fe448_copy(sword32* d, const sword32* a) |
1601 | | { |
1602 | | d[0] = a[0]; |
1603 | | d[1] = a[1]; |
1604 | | d[2] = a[2]; |
1605 | | d[3] = a[3]; |
1606 | | d[4] = a[4]; |
1607 | | d[5] = a[5]; |
1608 | | d[6] = a[6]; |
1609 | | d[7] = a[7]; |
1610 | | d[8] = a[8]; |
1611 | | d[9] = a[9]; |
1612 | | d[10] = a[10]; |
1613 | | d[11] = a[11]; |
1614 | | d[12] = a[12]; |
1615 | | d[13] = a[13]; |
1616 | | d[14] = a[14]; |
1617 | | d[15] = a[15]; |
1618 | | } |
1619 | | |
1620 | | /* Conditionally swap the elements. |
1621 | | * Constant time implementation. |
1622 | | * |
1623 | | * a [in] First field element. |
1624 | | * b [in] Second field element. |
1625 | | * c [in] Swap when 1. Valid values: 0, 1. |
1626 | | */ |
1627 | | static void fe448_cswap(sword32* a, sword32* b, int c) |
1628 | | { |
1629 | | sword32 mask = -(sword32)c; |
1630 | | sword32 t0 = (a[0] ^ b[0]) & mask; |
1631 | | sword32 t1 = (a[1] ^ b[1]) & mask; |
1632 | | sword32 t2 = (a[2] ^ b[2]) & mask; |
1633 | | sword32 t3 = (a[3] ^ b[3]) & mask; |
1634 | | sword32 t4 = (a[4] ^ b[4]) & mask; |
1635 | | sword32 t5 = (a[5] ^ b[5]) & mask; |
1636 | | sword32 t6 = (a[6] ^ b[6]) & mask; |
1637 | | sword32 t7 = (a[7] ^ b[7]) & mask; |
1638 | | sword32 t8 = (a[8] ^ b[8]) & mask; |
1639 | | sword32 t9 = (a[9] ^ b[9]) & mask; |
1640 | | sword32 t10 = (a[10] ^ b[10]) & mask; |
1641 | | sword32 t11 = (a[11] ^ b[11]) & mask; |
1642 | | sword32 t12 = (a[12] ^ b[12]) & mask; |
1643 | | sword32 t13 = (a[13] ^ b[13]) & mask; |
1644 | | sword32 t14 = (a[14] ^ b[14]) & mask; |
1645 | | sword32 t15 = (a[15] ^ b[15]) & mask; |
1646 | | a[0] ^= t0; |
1647 | | a[1] ^= t1; |
1648 | | a[2] ^= t2; |
1649 | | a[3] ^= t3; |
1650 | | a[4] ^= t4; |
1651 | | a[5] ^= t5; |
1652 | | a[6] ^= t6; |
1653 | | a[7] ^= t7; |
1654 | | a[8] ^= t8; |
1655 | | a[9] ^= t9; |
1656 | | a[10] ^= t10; |
1657 | | a[11] ^= t11; |
1658 | | a[12] ^= t12; |
1659 | | a[13] ^= t13; |
1660 | | a[14] ^= t14; |
1661 | | a[15] ^= t15; |
1662 | | b[0] ^= t0; |
1663 | | b[1] ^= t1; |
1664 | | b[2] ^= t2; |
1665 | | b[3] ^= t3; |
1666 | | b[4] ^= t4; |
1667 | | b[5] ^= t5; |
1668 | | b[6] ^= t6; |
1669 | | b[7] ^= t7; |
1670 | | b[8] ^= t8; |
1671 | | b[9] ^= t9; |
1672 | | b[10] ^= t10; |
1673 | | b[11] ^= t11; |
1674 | | b[12] ^= t12; |
1675 | | b[13] ^= t13; |
1676 | | b[14] ^= t14; |
1677 | | b[15] ^= t15; |
1678 | | } |
1679 | | |
1680 | | /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1) |
1681 | | * |
1682 | | * r [in] Field element to hold sum. |
1683 | | * a [in] Field element to add. |
1684 | | * b [in] Field element to add. |
1685 | | */ |
1686 | | void fe448_add(sword32* r, const sword32* a, const sword32* b) |
1687 | | { |
1688 | | r[0] = a[0] + b[0]; |
1689 | | r[1] = a[1] + b[1]; |
1690 | | r[2] = a[2] + b[2]; |
1691 | | r[3] = a[3] + b[3]; |
1692 | | r[4] = a[4] + b[4]; |
1693 | | r[5] = a[5] + b[5]; |
1694 | | r[6] = a[6] + b[6]; |
1695 | | r[7] = a[7] + b[7]; |
1696 | | r[8] = a[8] + b[8]; |
1697 | | r[9] = a[9] + b[9]; |
1698 | | r[10] = a[10] + b[10]; |
1699 | | r[11] = a[11] + b[11]; |
1700 | | r[12] = a[12] + b[12]; |
1701 | | r[13] = a[13] + b[13]; |
1702 | | r[14] = a[14] + b[14]; |
1703 | | r[15] = a[15] + b[15]; |
1704 | | } |
1705 | | |
1706 | | /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1) |
1707 | | * |
1708 | | * r [in] Field element to hold difference. |
1709 | | * a [in] Field element to subtract from. |
1710 | | * b [in] Field element to subtract. |
1711 | | */ |
1712 | | void fe448_sub(sword32* r, const sword32* a, const sword32* b) |
1713 | | { |
1714 | | r[0] = a[0] - b[0]; |
1715 | | r[1] = a[1] - b[1]; |
1716 | | r[2] = a[2] - b[2]; |
1717 | | r[3] = a[3] - b[3]; |
1718 | | r[4] = a[4] - b[4]; |
1719 | | r[5] = a[5] - b[5]; |
1720 | | r[6] = a[6] - b[6]; |
1721 | | r[7] = a[7] - b[7]; |
1722 | | r[8] = a[8] - b[8]; |
1723 | | r[9] = a[9] - b[9]; |
1724 | | r[10] = a[10] - b[10]; |
1725 | | r[11] = a[11] - b[11]; |
1726 | | r[12] = a[12] - b[12]; |
1727 | | r[13] = a[13] - b[13]; |
1728 | | r[14] = a[14] - b[14]; |
1729 | | r[15] = a[15] - b[15]; |
1730 | | } |
1731 | | |
1732 | | void fe448_reduce(sword32* a) |
1733 | | { |
1734 | | sword64 o; |
1735 | | |
1736 | | o = a[0 ] >> 28; a[1 ] += (sword32)o; a[0 ] -= (sword32)((word32)o << 28); |
1737 | | o = a[1 ] >> 28; a[2 ] += (sword32)o; a[1 ] -= (sword32)((word32)o << 28); |
1738 | | o = a[2 ] >> 28; a[3 ] += (sword32)o; a[2 ] -= (sword32)((word32)o << 28); |
1739 | | o = a[3 ] >> 28; a[4 ] += (sword32)o; a[3 ] -= (sword32)((word32)o << 28); |
1740 | | o = a[4 ] >> 28; a[5 ] += (sword32)o; a[4 ] -= (sword32)((word32)o << 28); |
1741 | | o = a[5 ] >> 28; a[6 ] += (sword32)o; a[5 ] -= (sword32)((word32)o << 28); |
1742 | | o = a[6 ] >> 28; a[7 ] += (sword32)o; a[6 ] -= (sword32)((word32)o << 28); |
1743 | | o = a[7 ] >> 28; a[8 ] += (sword32)o; a[7 ] -= (sword32)((word32)o << 28); |
1744 | | o = a[8 ] >> 28; a[9 ] += (sword32)o; a[8 ] -= (sword32)((word32)o << 28); |
1745 | | o = a[9 ] >> 28; a[10] += (sword32)o; a[9 ] -= (sword32)((word32)o << 28); |
1746 | | o = a[10] >> 28; a[11] += (sword32)o; a[10] -= (sword32)((word32)o << 28); |
1747 | | o = a[11] >> 28; a[12] += (sword32)o; a[11] -= (sword32)((word32)o << 28); |
1748 | | o = a[12] >> 28; a[13] += (sword32)o; a[12] -= (sword32)((word32)o << 28); |
1749 | | o = a[13] >> 28; a[14] += (sword32)o; a[13] -= (sword32)((word32)o << 28); |
1750 | | o = a[14] >> 28; a[15] += (sword32)o; a[14] -= (sword32)((word32)o << 28); |
1751 | | o = a[15] >> 28; a[0] += (sword32)o; |
1752 | | a[8] += (sword32)o; a[15] -= (sword32)((word32)o << 28); |
1753 | | } |
1754 | | /* Multiply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1) |
1755 | | * |
1756 | | * r [in] Field element to hold result. |
1757 | | * a [in] Field element to multiply. |
1758 | | */ |
1759 | | void fe448_mul39081(sword32* r, const sword32* a) |
1760 | | { |
1761 | | sword64 t; |
1762 | | sword32 o; |
1763 | | sword64 t0 = a[0] * (sword64)39081; |
1764 | | sword64 t1 = a[1] * (sword64)39081; |
1765 | | sword64 t2 = a[2] * (sword64)39081; |
1766 | | sword64 t3 = a[3] * (sword64)39081; |
1767 | | sword64 t4 = a[4] * (sword64)39081; |
1768 | | sword64 t5 = a[5] * (sword64)39081; |
1769 | | sword64 t6 = a[6] * (sword64)39081; |
1770 | | sword64 t7 = a[7] * (sword64)39081; |
1771 | | sword64 t8 = a[8] * (sword64)39081; |
1772 | | sword64 t9 = a[9] * (sword64)39081; |
1773 | | sword64 t10 = a[10] * (sword64)39081; |
1774 | | sword64 t11 = a[11] * (sword64)39081; |
1775 | | sword64 t12 = a[12] * (sword64)39081; |
1776 | | sword64 t13 = a[13] * (sword64)39081; |
1777 | | sword64 t14 = a[14] * (sword64)39081; |
1778 | | sword64 t15 = a[15] * (sword64)39081; |
1779 | | o = (sword32)(t0 >> 28); t1 += o; |
1780 | | t = (sword64)((word64)o << 28); t0 -= t; |
1781 | | o = (sword32)(t1 >> 28); t2 += o; |
1782 | | t = (sword64)((word64)o << 28); t1 -= t; |
1783 | | o = (sword32)(t2 >> 28); t3 += o; |
1784 | | t = (sword64)((word64)o << 28); t2 -= t; |
1785 | | o = (sword32)(t3 >> 28); t4 += o; |
1786 | | t = (sword64)((word64)o << 28); t3 -= t; |
1787 | | o = (sword32)(t4 >> 28); t5 += o; |
1788 | | t = (sword64)((word64)o << 28); t4 -= t; |
1789 | | o = (sword32)(t5 >> 28); t6 += o; |
1790 | | t = (sword64)((word64)o << 28); t5 -= t; |
1791 | | o = (sword32)(t6 >> 28); t7 += o; |
1792 | | t = (sword64)((word64)o << 28); t6 -= t; |
1793 | | o = (sword32)(t7 >> 28); t8 += o; |
1794 | | t = (sword64)((word64)o << 28); t7 -= t; |
1795 | | o = (sword32)(t8 >> 28); t9 += o; |
1796 | | t = (sword64)((word64)o << 28); t8 -= t; |
1797 | | o = (sword32)(t9 >> 28); t10 += o; |
1798 | | t = (sword64)((word64)o << 28); t9 -= t; |
1799 | | o = (sword32)(t10 >> 28); t11 += o; |
1800 | | t = (sword64)((word64)o << 28); t10 -= t; |
1801 | | o = (sword32)(t11 >> 28); t12 += o; |
1802 | | t = (sword64)((word64)o << 28); t11 -= t; |
1803 | | o = (sword32)(t12 >> 28); t13 += o; |
1804 | | t = (sword64)((word64)o << 28); t12 -= t; |
1805 | | o = (sword32)(t13 >> 28); t14 += o; |
1806 | | t = (sword64)((word64)o << 28); t13 -= t; |
1807 | | o = (sword32)(t14 >> 28); t15 += o; |
1808 | | t = (sword64)((word64)o << 28); t14 -= t; |
1809 | | o = (sword32)(t15 >> 28); t0 += o; |
1810 | | t8 += o; |
1811 | | t = (sword64)((word64)o << 28); t15 -= t; |
1812 | | |
1813 | | /* Store */ |
1814 | | r[0] = (sword32)t0; |
1815 | | r[1] = (sword32)t1; |
1816 | | r[2] = (sword32)t2; |
1817 | | r[3] = (sword32)t3; |
1818 | | r[4] = (sword32)t4; |
1819 | | r[5] = (sword32)t5; |
1820 | | r[6] = (sword32)t6; |
1821 | | r[7] = (sword32)t7; |
1822 | | r[8] = (sword32)t8; |
1823 | | r[9] = (sword32)t9; |
1824 | | r[10] = (sword32)t10; |
1825 | | r[11] = (sword32)t11; |
1826 | | r[12] = (sword32)t12; |
1827 | | r[13] = (sword32)t13; |
1828 | | r[14] = (sword32)t14; |
1829 | | r[15] = (sword32)t15; |
1830 | | } |
1831 | | |
1832 | | /* Multiply two field elements. r = a * b |
1833 | | * |
1834 | | * r [in] Field element to hold result. |
1835 | | * a [in] Field element to multiply. |
1836 | | * b [in] Field element to multiply. |
1837 | | */ |
1838 | | static WC_INLINE void fe448_mul_8(sword32* r, const sword32* a, |
1839 | | const sword32* b) |
1840 | | { |
1841 | | sword64 t; |
1842 | | sword64 o; |
1843 | | sword64 t15; |
1844 | | sword64 t0 = (sword64)a[ 0] * b[ 0]; |
1845 | | sword64 t1 = (sword64)a[ 0] * b[ 1]; |
1846 | | sword64 t101 = (sword64)a[ 1] * b[ 0]; |
1847 | | sword64 t2 = (sword64)a[ 0] * b[ 2]; |
1848 | | sword64 t102 = (sword64)a[ 1] * b[ 1]; |
1849 | | sword64 t202 = (sword64)a[ 2] * b[ 0]; |
1850 | | sword64 t3 = (sword64)a[ 0] * b[ 3]; |
1851 | | sword64 t103 = (sword64)a[ 1] * b[ 2]; |
1852 | | sword64 t203 = (sword64)a[ 2] * b[ 1]; |
1853 | | sword64 t303 = (sword64)a[ 3] * b[ 0]; |
1854 | | sword64 t4 = (sword64)a[ 0] * b[ 4]; |
1855 | | sword64 t104 = (sword64)a[ 1] * b[ 3]; |
1856 | | sword64 t204 = (sword64)a[ 2] * b[ 2]; |
1857 | | sword64 t304 = (sword64)a[ 3] * b[ 1]; |
1858 | | sword64 t404 = (sword64)a[ 4] * b[ 0]; |
1859 | | sword64 t5 = (sword64)a[ 0] * b[ 5]; |
1860 | | sword64 t105 = (sword64)a[ 1] * b[ 4]; |
1861 | | sword64 t205 = (sword64)a[ 2] * b[ 3]; |
1862 | | sword64 t305 = (sword64)a[ 3] * b[ 2]; |
1863 | | sword64 t405 = (sword64)a[ 4] * b[ 1]; |
1864 | | sword64 t505 = (sword64)a[ 5] * b[ 0]; |
1865 | | sword64 t6 = (sword64)a[ 0] * b[ 6]; |
1866 | | sword64 t106 = (sword64)a[ 1] * b[ 5]; |
1867 | | sword64 t206 = (sword64)a[ 2] * b[ 4]; |
1868 | | sword64 t306 = (sword64)a[ 3] * b[ 3]; |
1869 | | sword64 t406 = (sword64)a[ 4] * b[ 2]; |
1870 | | sword64 t506 = (sword64)a[ 5] * b[ 1]; |
1871 | | sword64 t606 = (sword64)a[ 6] * b[ 0]; |
1872 | | sword64 t7 = (sword64)a[ 0] * b[ 7]; |
1873 | | sword64 t107 = (sword64)a[ 1] * b[ 6]; |
1874 | | sword64 t207 = (sword64)a[ 2] * b[ 5]; |
1875 | | sword64 t307 = (sword64)a[ 3] * b[ 4]; |
1876 | | sword64 t407 = (sword64)a[ 4] * b[ 3]; |
1877 | | sword64 t507 = (sword64)a[ 5] * b[ 2]; |
1878 | | sword64 t607 = (sword64)a[ 6] * b[ 1]; |
1879 | | sword64 t707 = (sword64)a[ 7] * b[ 0]; |
1880 | | sword64 t8 = (sword64)a[ 1] * b[ 7]; |
1881 | | sword64 t108 = (sword64)a[ 2] * b[ 6]; |
1882 | | sword64 t208 = (sword64)a[ 3] * b[ 5]; |
1883 | | sword64 t308 = (sword64)a[ 4] * b[ 4]; |
1884 | | sword64 t408 = (sword64)a[ 5] * b[ 3]; |
1885 | | sword64 t508 = (sword64)a[ 6] * b[ 2]; |
1886 | | sword64 t608 = (sword64)a[ 7] * b[ 1]; |
1887 | | sword64 t9 = (sword64)a[ 2] * b[ 7]; |
1888 | | sword64 t109 = (sword64)a[ 3] * b[ 6]; |
1889 | | sword64 t209 = (sword64)a[ 4] * b[ 5]; |
1890 | | sword64 t309 = (sword64)a[ 5] * b[ 4]; |
1891 | | sword64 t409 = (sword64)a[ 6] * b[ 3]; |
1892 | | sword64 t509 = (sword64)a[ 7] * b[ 2]; |
1893 | | sword64 t10 = (sword64)a[ 3] * b[ 7]; |
1894 | | sword64 t110 = (sword64)a[ 4] * b[ 6]; |
1895 | | sword64 t210 = (sword64)a[ 5] * b[ 5]; |
1896 | | sword64 t310 = (sword64)a[ 6] * b[ 4]; |
1897 | | sword64 t410 = (sword64)a[ 7] * b[ 3]; |
1898 | | sword64 t11 = (sword64)a[ 4] * b[ 7]; |
1899 | | sword64 t111 = (sword64)a[ 5] * b[ 6]; |
1900 | | sword64 t211 = (sword64)a[ 6] * b[ 5]; |
1901 | | sword64 t311 = (sword64)a[ 7] * b[ 4]; |
1902 | | sword64 t12 = (sword64)a[ 5] * b[ 7]; |
1903 | | sword64 t112 = (sword64)a[ 6] * b[ 6]; |
1904 | | sword64 t212 = (sword64)a[ 7] * b[ 5]; |
1905 | | sword64 t13 = (sword64)a[ 6] * b[ 7]; |
1906 | | sword64 t113 = (sword64)a[ 7] * b[ 6]; |
1907 | | sword64 t14 = (sword64)a[ 7] * b[ 7]; |
1908 | | t1 += t101; |
1909 | | t2 += t102; t2 += t202; |
1910 | | t3 += t103; t3 += t203; t3 += t303; |
1911 | | t4 += t104; t4 += t204; t4 += t304; t4 += t404; |
1912 | | t5 += t105; t5 += t205; t5 += t305; t5 += t405; t5 += t505; |
1913 | | t6 += t106; t6 += t206; t6 += t306; t6 += t406; t6 += t506; |
1914 | | t6 += t606; |
1915 | | t7 += t107; t7 += t207; t7 += t307; t7 += t407; t7 += t507; |
1916 | | t7 += t607; |
1917 | | t8 += t108; t8 += t208; t8 += t308; t8 += t408; t8 += t508; |
1918 | | t8 += t608; |
1919 | | t9 += t109; t9 += t209; t9 += t309; t9 += t409; t9 += t509; |
1920 | | t10 += t110; t10 += t210; t10 += t310; t10 += t410; |
1921 | | t11 += t111; t11 += t211; t11 += t311; |
1922 | | t12 += t112; t12 += t212; |
1923 | | t13 += t113; |
1924 | | o = t14 >> 28; |
1925 | | t15 = o; |
1926 | | t14 -= (sword64)((word64)o << 28); |
1927 | | o = (t0 >> 28); t1 += o; |
1928 | | t = (sword64)((word64)o << 28); t0 -= t; |
1929 | | o = (t1 >> 28); t2 += o; |
1930 | | t = (sword64)((word64)o << 28); t1 -= t; |
1931 | | o = (t2 >> 28); t3 += o; |
1932 | | t = (sword64)((word64)o << 28); t2 -= t; |
1933 | | o = (t3 >> 28); t4 += o; |
1934 | | t = (sword64)((word64)o << 28); t3 -= t; |
1935 | | o = (t4 >> 28); t5 += o; |
1936 | | t = (sword64)((word64)o << 28); t4 -= t; |
1937 | | o = (t5 >> 28); t6 += o; |
1938 | | t = (sword64)((word64)o << 28); t5 -= t; |
1939 | | o = (t6 >> 28); t7 += o; |
1940 | | t = (sword64)((word64)o << 28); t6 -= t; |
1941 | | o = (t707>> 28); t8 += o; |
1942 | | t = (sword64)((word64)o << 28); t707-= t; t7 += t707; |
1943 | | o = (t7 >> 28); t8 += o; |
1944 | | t = (sword64)((word64)o << 28); t7 -= t; |
1945 | | o = (t8 >> 28); t9 += o; |
1946 | | t = (sword64)((word64)o << 28); t8 -= t; |
1947 | | o = (t9 >> 28); t10 += o; |
1948 | | t = (sword64)((word64)o << 28); t9 -= t; |
1949 | | o = (t10 >> 28); t11 += o; |
1950 | | t = (sword64)((word64)o << 28); t10 -= t; |
1951 | | o = (t11 >> 28); t12 += o; |
1952 | | t = (sword64)((word64)o << 28); t11 -= t; |
1953 | | o = (t12 >> 28); t13 += o; |
1954 | | t = (sword64)((word64)o << 28); t12 -= t; |
1955 | | o = (t13 >> 28); t14 += o; |
1956 | | t = (sword64)((word64)o << 28); t13 -= t; |
1957 | | o = (t14 >> 28); t15 += o; |
1958 | | t = (sword64)((word64)o << 28); t14 -= t; |
1959 | | o = (t15 >> 28); t0 += o; |
1960 | | t8 += o; |
1961 | | t = (sword64)((word64)o << 28); t15 -= t; |
1962 | | |
1963 | | /* Store */ |
1964 | | r[0] = (sword32)t0; |
1965 | | r[1] = (sword32)t1; |
1966 | | r[2] = (sword32)t2; |
1967 | | r[3] = (sword32)t3; |
1968 | | r[4] = (sword32)t4; |
1969 | | r[5] = (sword32)t5; |
1970 | | r[6] = (sword32)t6; |
1971 | | r[7] = (sword32)t7; |
1972 | | r[8] = (sword32)t8; |
1973 | | r[9] = (sword32)t9; |
1974 | | r[10] = (sword32)t10; |
1975 | | r[11] = (sword32)t11; |
1976 | | r[12] = (sword32)t12; |
1977 | | r[13] = (sword32)t13; |
1978 | | r[14] = (sword32)t14; |
1979 | | r[15] = (sword32)t15; |
1980 | | } |
1981 | | |
1982 | | /* Multiply two field elements. r = (a * b) mod (2^448 - 2^224 - 1) |
1983 | | * |
1984 | | * r [in] Field element to hold result. |
1985 | | * a [in] Field element to multiply. |
1986 | | * b [in] Field element to multiply. |
1987 | | */ |
1988 | | void fe448_mul(sword32* r, const sword32* a, const sword32* b) |
1989 | | { |
1990 | | sword32 r0[16]; |
1991 | | sword32 r1[16]; |
1992 | | sword32* a1 = r1; |
1993 | | sword32 b1[8]; |
1994 | | sword32 r2[16]; |
1995 | | a1[0] = a[0] + a[8]; |
1996 | | a1[1] = a[1] + a[9]; |
1997 | | a1[2] = a[2] + a[10]; |
1998 | | a1[3] = a[3] + a[11]; |
1999 | | a1[4] = a[4] + a[12]; |
2000 | | a1[5] = a[5] + a[13]; |
2001 | | a1[6] = a[6] + a[14]; |
2002 | | a1[7] = a[7] + a[15]; |
2003 | | b1[0] = b[0] + b[8]; |
2004 | | b1[1] = b[1] + b[9]; |
2005 | | b1[2] = b[2] + b[10]; |
2006 | | b1[3] = b[3] + b[11]; |
2007 | | b1[4] = b[4] + b[12]; |
2008 | | b1[5] = b[5] + b[13]; |
2009 | | b1[6] = b[6] + b[14]; |
2010 | | b1[7] = b[7] + b[15]; |
2011 | | fe448_mul_8(r2, a + 8, b + 8); |
2012 | | fe448_mul_8(r0, a, b); |
2013 | | fe448_mul_8(r1, a1, b1); |
2014 | | r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8]; |
2015 | | r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9]; |
2016 | | r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10]; |
2017 | | r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11]; |
2018 | | r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12]; |
2019 | | r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13]; |
2020 | | r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14]; |
2021 | | r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15]; |
2022 | | r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8]; |
2023 | | r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9]; |
2024 | | r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10]; |
2025 | | r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11]; |
2026 | | r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12]; |
2027 | | r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13]; |
2028 | | r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14]; |
2029 | | r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15]; |
2030 | | } |
2031 | | |
2032 | | /* Square a field element. r = a * a |
2033 | | * |
2034 | | * r [in] Field element to hold result. |
2035 | | * a [in] Field element to square. |
2036 | | */ |
2037 | | static WC_INLINE void fe448_sqr_8(sword32* r, const sword32* a) |
2038 | | { |
2039 | | sword64 o; |
2040 | | sword64 t15; |
2041 | | sword64 t; |
2042 | | sword64 t0 = (sword64)a[ 0] * a[ 0]; |
2043 | | sword64 t1 = 2 * (sword64)a[ 0] * a[ 1]; |
2044 | | sword64 t2 = 2 * (sword64)a[ 0] * a[ 2]; |
2045 | | sword64 t102 = (sword64)a[ 1] * a[ 1]; |
2046 | | sword64 t3 = 2 * (sword64)a[ 0] * a[ 3]; |
2047 | | sword64 t103 = 2 * (sword64)a[ 1] * a[ 2]; |
2048 | | sword64 t4 = 2 * (sword64)a[ 0] * a[ 4]; |
2049 | | sword64 t104 = 2 * (sword64)a[ 1] * a[ 3]; |
2050 | | sword64 t204 = (sword64)a[ 2] * a[ 2]; |
2051 | | sword64 t5 = 2 * (sword64)a[ 0] * a[ 5]; |
2052 | | sword64 t105 = 2 * (sword64)a[ 1] * a[ 4]; |
2053 | | sword64 t205 = 2 * (sword64)a[ 2] * a[ 3]; |
2054 | | sword64 t6 = 2 * (sword64)a[ 0] * a[ 6]; |
2055 | | sword64 t106 = 2 * (sword64)a[ 1] * a[ 5]; |
2056 | | sword64 t206 = 2 * (sword64)a[ 2] * a[ 4]; |
2057 | | sword64 t306 = (sword64)a[ 3] * a[ 3]; |
2058 | | sword64 t7 = 2 * (sword64)a[ 0] * a[ 7]; |
2059 | | sword64 t107 = 2 * (sword64)a[ 1] * a[ 6]; |
2060 | | sword64 t207 = 2 * (sword64)a[ 2] * a[ 5]; |
2061 | | sword64 t307 = 2 * (sword64)a[ 3] * a[ 4]; |
2062 | | sword64 t8 = 2 * (sword64)a[ 1] * a[ 7]; |
2063 | | sword64 t108 = 2 * (sword64)a[ 2] * a[ 6]; |
2064 | | sword64 t208 = 2 * (sword64)a[ 3] * a[ 5]; |
2065 | | sword64 t308 = (sword64)a[ 4] * a[ 4]; |
2066 | | sword64 t9 = 2 * (sword64)a[ 2] * a[ 7]; |
2067 | | sword64 t109 = 2 * (sword64)a[ 3] * a[ 6]; |
2068 | | sword64 t209 = 2 * (sword64)a[ 4] * a[ 5]; |
2069 | | sword64 t10 = 2 * (sword64)a[ 3] * a[ 7]; |
2070 | | sword64 t110 = 2 * (sword64)a[ 4] * a[ 6]; |
2071 | | sword64 t210 = (sword64)a[ 5] * a[ 5]; |
2072 | | sword64 t11 = 2 * (sword64)a[ 4] * a[ 7]; |
2073 | | sword64 t111 = 2 * (sword64)a[ 5] * a[ 6]; |
2074 | | sword64 t12 = 2 * (sword64)a[ 5] * a[ 7]; |
2075 | | sword64 t112 = (sword64)a[ 6] * a[ 6]; |
2076 | | sword64 t13 = 2 * (sword64)a[ 6] * a[ 7]; |
2077 | | sword64 t14 = (sword64)a[ 7] * a[ 7]; |
2078 | | t2 += t102; |
2079 | | t3 += t103; |
2080 | | t4 += t104; t4 += t204; |
2081 | | t5 += t105; t5 += t205; |
2082 | | t6 += t106; t6 += t206; t6 += t306; |
2083 | | t7 += t107; t7 += t207; |
2084 | | t8 += t108; t8 += t208; t8 += t308; |
2085 | | t9 += t109; t9 += t209; |
2086 | | t10 += t110; t10 += t210; |
2087 | | t11 += t111; |
2088 | | t12 += t112; |
2089 | | o = t14 >> 28; |
2090 | | t15 = o; |
2091 | | t14 -= (sword64)((word64)o << 28); |
2092 | | o = (t0 >> 28); t1 += o; |
2093 | | t = (sword64)((word64)o << 28); t0 -= t; |
2094 | | o = (t1 >> 28); t2 += o; |
2095 | | t = (sword64)((word64)o << 28); t1 -= t; |
2096 | | o = (t2 >> 28); t3 += o; |
2097 | | t = (sword64)((word64)o << 28); t2 -= t; |
2098 | | o = (t3 >> 28); t4 += o; |
2099 | | t = (sword64)((word64)o << 28); t3 -= t; |
2100 | | o = (t4 >> 28); t5 += o; |
2101 | | t = (sword64)((word64)o << 28); t4 -= t; |
2102 | | o = (t5 >> 28); t6 += o; |
2103 | | t = (sword64)((word64)o << 28); t5 -= t; |
2104 | | o = (t6 >> 28); t7 += o; |
2105 | | t = (sword64)((word64)o << 28); t6 -= t; |
2106 | | o = (t307>> 28); t8 += o; |
2107 | | t = (sword64)((word64)o << 28); t307-= t; t7 += t307; |
2108 | | o = (t7 >> 28); t8 += o; |
2109 | | t = (sword64)((word64)o << 28); t7 -= t; |
2110 | | o = (t8 >> 28); t9 += o; |
2111 | | t = (sword64)((word64)o << 28); t8 -= t; |
2112 | | o = (t9 >> 28); t10 += o; |
2113 | | t = (sword64)((word64)o << 28); t9 -= t; |
2114 | | o = (t10 >> 28); t11 += o; |
2115 | | t = (sword64)((word64)o << 28); t10 -= t; |
2116 | | o = (t11 >> 28); t12 += o; |
2117 | | t = (sword64)((word64)o << 28); t11 -= t; |
2118 | | o = (t12 >> 28); t13 += o; |
2119 | | t = (sword64)((word64)o << 28); t12 -= t; |
2120 | | o = (t13 >> 28); t14 += o; |
2121 | | t = (sword64)((word64)o << 28); t13 -= t; |
2122 | | o = (t14 >> 28); t15 += o; |
2123 | | t = (sword64)((word64)o << 28); t14 -= t; |
2124 | | o = (t15 >> 28); t0 += o; |
2125 | | t8 += o; |
2126 | | t = (sword64)((word64)o << 28); t15 -= t; |
2127 | | |
2128 | | /* Store */ |
2129 | | r[0] = (sword32)t0; |
2130 | | r[1] = (sword32)t1; |
2131 | | r[2] = (sword32)t2; |
2132 | | r[3] = (sword32)t3; |
2133 | | r[4] = (sword32)t4; |
2134 | | r[5] = (sword32)t5; |
2135 | | r[6] = (sword32)t6; |
2136 | | r[7] = (sword32)t7; |
2137 | | r[8] = (sword32)t8; |
2138 | | r[9] = (sword32)t9; |
2139 | | r[10] = (sword32)t10; |
2140 | | r[11] = (sword32)t11; |
2141 | | r[12] = (sword32)t12; |
2142 | | r[13] = (sword32)t13; |
2143 | | r[14] = (sword32)t14; |
2144 | | r[15] = (sword32)t15; |
2145 | | } |
2146 | | |
2147 | | /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1) |
2148 | | * |
2149 | | * r [in] Field element to hold result. |
2150 | | * a [in] Field element to square. |
2151 | | */ |
2152 | | void fe448_sqr(sword32* r, const sword32* a) |
2153 | | { |
2154 | | sword32 r0[16]; |
2155 | | sword32 r1[16]; |
2156 | | sword32* a1 = r1; |
2157 | | sword32 r2[16]; |
2158 | | a1[0] = a[0] + a[8]; |
2159 | | a1[1] = a[1] + a[9]; |
2160 | | a1[2] = a[2] + a[10]; |
2161 | | a1[3] = a[3] + a[11]; |
2162 | | a1[4] = a[4] + a[12]; |
2163 | | a1[5] = a[5] + a[13]; |
2164 | | a1[6] = a[6] + a[14]; |
2165 | | a1[7] = a[7] + a[15]; |
2166 | | fe448_sqr_8(r2, a + 8); |
2167 | | fe448_sqr_8(r0, a); |
2168 | | fe448_sqr_8(r1, a1); |
2169 | | r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8]; |
2170 | | r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9]; |
2171 | | r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10]; |
2172 | | r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11]; |
2173 | | r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12]; |
2174 | | r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13]; |
2175 | | r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14]; |
2176 | | r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15]; |
2177 | | r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8]; |
2178 | | r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9]; |
2179 | | r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10]; |
2180 | | r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11]; |
2181 | | r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12]; |
2182 | | r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13]; |
2183 | | r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14]; |
2184 | | r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15]; |
2185 | | } |
2186 | | |
2187 | | /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1 |
2188 | | * Constant time implementation - using Fermat's little theorem: |
2189 | | * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a |
2190 | | * For curve448: p - 2 = 2^448 - 2^224 - 3 |
2191 | | * |
2192 | | * r [in] Field element to hold result. |
2193 | | * a [in] Field element to invert. |
2194 | | */ |
2195 | | void fe448_invert(sword32* r, const sword32* a) |
2196 | | { |
2197 | | sword32 t1[16]; |
2198 | | sword32 t2[16]; |
2199 | | sword32 t3[16]; |
2200 | | sword32 t4[16]; |
2201 | | int i; |
2202 | | |
2203 | | fe448_sqr(t1, a); |
2204 | | /* t1 = 2 */ |
2205 | | fe448_mul(t1, t1, a); |
2206 | | /* t1 = 3 */ |
2207 | | fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2); |
2208 | | /* t2 = c */ |
2209 | | fe448_mul(t3, t2, a); |
2210 | | /* t3 = d */ |
2211 | | fe448_mul(t1, t2, t1); |
2212 | | /* t1 = f */ |
2213 | | fe448_sqr(t2, t1); |
2214 | | /* t2 = 1e */ |
2215 | | fe448_mul(t4, t2, a); |
2216 | | /* t4 = 1f */ |
2217 | | fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); |
2218 | | /* t2 = 3e0 */ |
2219 | | fe448_mul(t1, t2, t4); |
2220 | | /* t1 = 3ff */ |
2221 | | fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2); |
2222 | | /* t2 = ffc00 */ |
2223 | | fe448_mul(t1, t2, t1); |
2224 | | /* t1 = fffff */ |
2225 | | fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); |
2226 | | /* t2 = 1ffffe0 */ |
2227 | | fe448_mul(t1, t2, t4); |
2228 | | /* t1 = 1ffffff */ |
2229 | | fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2); |
2230 | | /* t2 = 3fffffe000000 */ |
2231 | | fe448_mul(t1, t2, t1); |
2232 | | /* t1 = 3ffffffffffff */ |
2233 | | fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2); |
2234 | | /* t2 = 7fffffffffffe0 */ |
2235 | | fe448_mul(t1, t2, t4); |
2236 | | /* t1 = 7fffffffffffff */ |
2237 | | fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2); |
2238 | | /* t2 = 3fffffffffffff80000000000000 */ |
2239 | | fe448_mul(t1, t2, t1); |
2240 | | /* t1 = 3fffffffffffffffffffffffffff */ |
2241 | | fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2); |
2242 | | /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */ |
2243 | | fe448_mul(t1, t2, t1); |
2244 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */ |
2245 | | fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2); |
2246 | | /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */ |
2247 | | fe448_mul(t3, t3, t2); |
2248 | | /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ |
2249 | | fe448_mul(t1, t3, a); |
2250 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */ |
2251 | | fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1); |
2252 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */ |
2253 | | fe448_mul(r, t3, t1); |
2254 | | /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ |
2255 | | } |
2256 | | |
2257 | | /* Scalar multiply the point by a number. r = n.a |
2258 | | * Uses Montgomery ladder and only requires the x-ordinate. |
2259 | | * |
2260 | | * r [in] Field element to hold result. |
2261 | | * n [in] Scalar as an array of bytes. |
2262 | | * a [in] Point to multiply - x-ordinate only. |
2263 | | */ |
2264 | | int curve448(byte* r, const byte* n, const byte* a) |
2265 | | { |
2266 | | sword32 x1[16]; |
2267 | | sword32 x2[16]; |
2268 | | sword32 z2[16]; |
2269 | | sword32 x3[16]; |
2270 | | sword32 z3[16]; |
2271 | | sword32 t0[16]; |
2272 | | sword32 t1[16]; |
2273 | | int i; |
2274 | | unsigned int swap; |
2275 | | |
2276 | | fe448_from_bytes(x1, a); |
2277 | | fe448_1(x2); |
2278 | | fe448_0(z2); |
2279 | | fe448_copy(x3, x1); |
2280 | | fe448_1(z3); |
2281 | | |
2282 | | swap = 0; |
2283 | | for (i = 447; i >= 0; --i) { |
2284 | | unsigned int b = (n[i >> 3] >> (i & 7)) & 1; |
2285 | | swap ^= b; |
2286 | | fe448_cswap(x2, x3, (int)swap); |
2287 | | fe448_cswap(z2, z3, (int)swap); |
2288 | | swap = b; |
2289 | | |
2290 | | /* Montgomery Ladder - double and add */ |
2291 | | fe448_add(t0, x2, z2); |
2292 | | fe448_reduce(t0); |
2293 | | fe448_add(t1, x3, z3); |
2294 | | fe448_reduce(t1); |
2295 | | fe448_sub(x2, x2, z2); |
2296 | | fe448_sub(x3, x3, z3); |
2297 | | fe448_mul(t1, t1, x2); |
2298 | | fe448_mul(z3, x3, t0); |
2299 | | fe448_sqr(t0, t0); |
2300 | | fe448_sqr(x2, x2); |
2301 | | fe448_add(x3, z3, t1); |
2302 | | fe448_reduce(x3); |
2303 | | fe448_sqr(x3, x3); |
2304 | | fe448_sub(z3, z3, t1); |
2305 | | fe448_sqr(z3, z3); |
2306 | | fe448_mul(z3, z3, x1); |
2307 | | fe448_sub(t1, t0, x2); |
2308 | | fe448_mul(x2, t0, x2); |
2309 | | fe448_mul39081(z2, t1); |
2310 | | fe448_add(z2, t0, z2); |
2311 | | fe448_mul(z2, z2, t1); |
2312 | | } |
2313 | | /* Last two bits are 0 - no final swap check required. */ |
2314 | | |
2315 | | fe448_invert(z2, z2); |
2316 | | fe448_mul(x2, x2, z2); |
2317 | | fe448_to_bytes(r, x2); |
2318 | | |
2319 | | return 0; |
2320 | | } |
2321 | | |
2322 | | #ifdef HAVE_ED448 |
2323 | | /* Check whether field element is not 0. |
2324 | | * Must convert to a normalized form before checking. |
2325 | | * |
2326 | | * a [in] Field element. |
2327 | | * returns 0 when zero, and any other value otherwise. |
2328 | | */ |
2329 | | int fe448_isnonzero(const sword32* a) |
2330 | | { |
2331 | | byte b[56]; |
2332 | | int i; |
2333 | | byte c = 0; |
2334 | | fe448_to_bytes(b, a); |
2335 | | for (i = 0; i < 56; i++) |
2336 | | c |= b[i]; |
2337 | | return c; |
2338 | | } |
2339 | | |
2340 | | /* Check whether field element is negative. |
2341 | | * Must convert to a normalized form before checking. |
2342 | | * |
2343 | | * a [in] Field element. |
2344 | | * returns 1 when negative, and 0 otherwise. |
2345 | | */ |
2346 | | int fe448_isnegative(const sword32* a) |
2347 | | { |
2348 | | byte b[56]; |
2349 | | fe448_to_bytes(b, a); |
2350 | | return b[0] & 1; |
2351 | | } |
2352 | | |
2353 | | /* Negates the field element. r = -a |
2354 | | * |
2355 | | * r [in] Field element to hold result. |
2356 | | * a [in] Field element. |
2357 | | */ |
2358 | | void fe448_neg(sword32* r, const sword32* a) |
2359 | | { |
2360 | | r[0] = -a[0]; |
2361 | | r[1] = -a[1]; |
2362 | | r[2] = -a[2]; |
2363 | | r[3] = -a[3]; |
2364 | | r[4] = -a[4]; |
2365 | | r[5] = -a[5]; |
2366 | | r[6] = -a[6]; |
2367 | | r[7] = -a[7]; |
2368 | | r[8] = -a[8]; |
2369 | | r[9] = -a[9]; |
2370 | | r[10] = -a[10]; |
2371 | | r[11] = -a[11]; |
2372 | | r[12] = -a[12]; |
2373 | | r[13] = -a[13]; |
2374 | | r[14] = -a[14]; |
2375 | | r[15] = -a[15]; |
2376 | | } |
2377 | | |
2378 | | /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1 |
2379 | | * Used for calculating y-ordinate from x-ordinate for Ed448. |
2380 | | * |
2381 | | * r [in] Field element to hold result. |
2382 | | * a [in] Field element to exponentiate. |
2383 | | */ |
2384 | | void fe448_pow_2_446_222_1(sword32* r, const sword32* a) |
2385 | | { |
2386 | | sword32 t1[16]; |
2387 | | sword32 t2[16]; |
2388 | | sword32 t3[16]; |
2389 | | sword32 t4[16]; |
2390 | | sword32 t5[16]; |
2391 | | int i; |
2392 | | |
2393 | | fe448_sqr(t3, a); |
2394 | | /* t3 = 2 */ |
2395 | | fe448_mul(t1, t3, a); |
2396 | | /* t1 = 3 */ |
2397 | | fe448_sqr(t5, t1); |
2398 | | /* t5 = 6 */ |
2399 | | fe448_mul(t5, t5, a); |
2400 | | /* t5 = 7 */ |
2401 | | fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2); |
2402 | | /* t2 = c */ |
2403 | | fe448_mul(t3, t2, t3); |
2404 | | /* t3 = e */ |
2405 | | fe448_mul(t1, t2, t1); |
2406 | | /* t1 = f */ |
2407 | | fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2); |
2408 | | /* t2 = 78 */ |
2409 | | fe448_mul(t5, t2, t5); |
2410 | | /* t5 = 7f */ |
2411 | | fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2); |
2412 | | /* t2 = f0 */ |
2413 | | fe448_mul(t1, t2, t1); |
2414 | | /* t1 = ff */ |
2415 | | fe448_mul(t3, t3, t2); |
2416 | | /* t3 = fe */ |
2417 | | fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2); |
2418 | | /* t2 = 7f80 */ |
2419 | | fe448_mul(t5, t2, t5); |
2420 | | /* t5 = 7fff */ |
2421 | | fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2); |
2422 | | /* t2 = ff00 */ |
2423 | | fe448_mul(t1, t2, t1); |
2424 | | /* t1 = ffff */ |
2425 | | fe448_mul(t3, t3, t2); |
2426 | | /* t3 = fffe */ |
2427 | | fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2); |
2428 | | /* t2 = 3fff8000 */ |
2429 | | fe448_mul(t5, t2, t5); |
2430 | | /* t5 = 3fffffff */ |
2431 | | fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2); |
2432 | | /* t2 = ffff0000 */ |
2433 | | fe448_mul(t1, t2, t1); |
2434 | | /* t1 = ffffffff */ |
2435 | | fe448_mul(t3, t3, t2); |
2436 | | /* t3 = fffffffe */ |
2437 | | fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2); |
2438 | | /* t2 = ffffffff00000000 */ |
2439 | | fe448_mul(t2, t2, t1); |
2440 | | /* t2 = ffffffffffffffff */ |
2441 | | fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1); |
2442 | | /* t1 = ffffffffffffffff0000000000000000 */ |
2443 | | fe448_mul(t1, t1, t2); |
2444 | | /* t1 = ffffffffffffffffffffffffffffffff */ |
2445 | | fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1); |
2446 | | /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */ |
2447 | | fe448_mul(t4, t1, t2); |
2448 | | /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */ |
2449 | | fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2); |
2450 | | /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */ |
2451 | | fe448_mul(t3, t3, t2); |
2452 | | /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */ |
2453 | | fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1); |
2454 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */ |
2455 | | fe448_mul(t1, t1, t4); |
2456 | | /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */ |
2457 | | fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1); |
2458 | | /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */ |
2459 | | fe448_mul(r, t5, t1); |
2460 | | /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */ |
2461 | | } |
2462 | | |
2463 | | /* Constant time, conditional move of b into a. |
2464 | | * a is not changed if the condition is 0. |
2465 | | * |
2466 | | * a A field element. |
2467 | | * b A field element. |
2468 | | * c If 1 then copy and if 0 then don't copy. |
2469 | | */ |
2470 | | void fe448_cmov(sword32* a, const sword32* b, int c) |
2471 | | { |
2472 | | sword32 m = -(sword32)c; |
2473 | | sword32 t0 = m & (a[0] ^ b[0]); |
2474 | | sword32 t1 = m & (a[1] ^ b[1]); |
2475 | | sword32 t2 = m & (a[2] ^ b[2]); |
2476 | | sword32 t3 = m & (a[3] ^ b[3]); |
2477 | | sword32 t4 = m & (a[4] ^ b[4]); |
2478 | | sword32 t5 = m & (a[5] ^ b[5]); |
2479 | | sword32 t6 = m & (a[6] ^ b[6]); |
2480 | | sword32 t7 = m & (a[7] ^ b[7]); |
2481 | | sword32 t8 = m & (a[8] ^ b[8]); |
2482 | | sword32 t9 = m & (a[9] ^ b[9]); |
2483 | | sword32 t10 = m & (a[10] ^ b[10]); |
2484 | | sword32 t11 = m & (a[11] ^ b[11]); |
2485 | | sword32 t12 = m & (a[12] ^ b[12]); |
2486 | | sword32 t13 = m & (a[13] ^ b[13]); |
2487 | | sword32 t14 = m & (a[14] ^ b[14]); |
2488 | | sword32 t15 = m & (a[15] ^ b[15]); |
2489 | | |
2490 | | a[0] ^= t0; |
2491 | | a[1] ^= t1; |
2492 | | a[2] ^= t2; |
2493 | | a[3] ^= t3; |
2494 | | a[4] ^= t4; |
2495 | | a[5] ^= t5; |
2496 | | a[6] ^= t6; |
2497 | | a[7] ^= t7; |
2498 | | a[8] ^= t8; |
2499 | | a[9] ^= t9; |
2500 | | a[10] ^= t10; |
2501 | | a[11] ^= t11; |
2502 | | a[12] ^= t12; |
2503 | | a[13] ^= t13; |
2504 | | a[14] ^= t14; |
2505 | | a[15] ^= t15; |
2506 | | } |
2507 | | |
2508 | | #endif /* HAVE_ED448 */ |
2509 | | #endif |
2510 | | |
2511 | | #endif /* HAVE_CURVE448 || HAVE_ED448 */ |