Coverage Report

Created: 2024-06-28 06:19

/src/wolfssl/wolfcrypt/src/fe_x25519_128.i
Line
Count
Source (jump to first uncovered line)
1
/* fe_x25519_128.i
2
 *
3
 * Copyright (C) 2006-2023 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Generated using (from wolfssl):
23
 *   cd ../scripts
24
 *   ruby ./x25519/fe_x25519_128_gen.rb > ../wolfssl/wolfcrypt/src/fe_x25519_128.i
25
 */
26
27
void fe_init(void)
28
0
{
29
0
}
30
31
/* Convert a number represented as an array of bytes to an array of words with
32
 * 51-bits of data in each word.
33
 *
34
 * in   An array of bytes.
35
 * out  An array of words.
36
 */
37
void fe_frombytes(fe out, const unsigned char *in)
38
0
{
39
0
    out[0] = (((sword64)((in[ 0]      )       ))      )
40
0
           | (((sword64)((in[ 1]      )       )) <<  8)
41
0
           | (((sword64)((in[ 2]      )       )) << 16)
42
0
           | (((sword64)((in[ 3]      )       )) << 24)
43
0
           | (((sword64)((in[ 4]      )       )) << 32)
44
0
           | (((sword64)((in[ 5]      )       )) << 40)
45
0
           | (((sword64)((in[ 6]      ) & 0x07)) << 48);
46
0
    out[1] = (((sword64)((in[ 6] >>  3) & 0x1f))      )
47
0
           | (((sword64)((in[ 7]      )       )) <<  5)
48
0
           | (((sword64)((in[ 8]      )       )) << 13)
49
0
           | (((sword64)((in[ 9]      )       )) << 21)
50
0
           | (((sword64)((in[10]      )       )) << 29)
51
0
           | (((sword64)((in[11]      )       )) << 37)
52
0
           | (((sword64)((in[12]      ) & 0x3f)) << 45);
53
0
    out[2] = (((sword64)((in[12] >>  6) & 0x03))      )
54
0
           | (((sword64)((in[13]      )       )) <<  2)
55
0
           | (((sword64)((in[14]      )       )) << 10)
56
0
           | (((sword64)((in[15]      )       )) << 18)
57
0
           | (((sword64)((in[16]      )       )) << 26)
58
0
           | (((sword64)((in[17]      )       )) << 34)
59
0
           | (((sword64)((in[18]      )       )) << 42)
60
0
           | (((sword64)((in[19]      ) & 0x01)) << 50);
61
0
    out[3] = (((sword64)((in[19] >>  1) & 0x7f))      )
62
0
           | (((sword64)((in[20]      )       )) <<  7)
63
0
           | (((sword64)((in[21]      )       )) << 15)
64
0
           | (((sword64)((in[22]      )       )) << 23)
65
0
           | (((sword64)((in[23]      )       )) << 31)
66
0
           | (((sword64)((in[24]      )       )) << 39)
67
0
           | (((sword64)((in[25]      ) & 0x0f)) << 47);
68
0
    out[4] = (((sword64)((in[25] >>  4) & 0x0f))      )
69
0
           | (((sword64)((in[26]      )       )) <<  4)
70
0
           | (((sword64)((in[27]      )       )) << 12)
71
0
           | (((sword64)((in[28]      )       )) << 20)
72
0
           | (((sword64)((in[29]      )       )) << 28)
73
0
           | (((sword64)((in[30]      )       )) << 36)
74
0
           | (((sword64)((in[31]      ) & 0x7f)) << 44);
75
0
}
76
77
/* Convert a number represented as an array of words to an array of bytes.
78
 * The array of words is normalized to an array of 51-bit data words and if
79
 * greater than the mod, modulo reduced by the prime 2^255 - 1.
80
 *
81
 * n    An array of words.
82
 * out  An array of bytes.
83
 */
84
void fe_tobytes(unsigned char *out, const fe n)
85
0
{
86
0
    fe      in;
87
0
    sword64 c;
88
89
0
    in[0] = n[0];
90
0
    in[1] = n[1];
91
0
    in[2] = n[2];
92
0
    in[3] = n[3];
93
0
    in[4] = n[4];
94
95
    /* Normalize to 51-bits of data per word. */
96
0
    in[0] += (in[4] >> 51) * 19; in[4] &= 0x7ffffffffffff;
97
98
0
    in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
99
0
    in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
100
0
    in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
101
0
    in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
102
0
    in[0] += (in[4] >> 51) * 19;
103
0
    in[4] &= 0x7ffffffffffff;
104
105
0
    c = (in[0] + 19) >> 51;
106
0
    c = (in[1] + c) >> 51;
107
0
    c = (in[2] + c) >> 51;
108
0
    c = (in[3] + c) >> 51;
109
0
    c = (in[4] + c) >> 51;
110
0
    in[0] += c * 19;
111
0
    in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
112
0
    in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
113
0
    in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
114
0
    in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
115
0
    in[4] &= 0x7ffffffffffff;
116
117
0
    out[ 0] = (((byte)((in[0]      )       ))      );
118
0
    out[ 1] = (((byte)((in[0] >>  8)       ))      );
119
0
    out[ 2] = (((byte)((in[0] >> 16)       ))      );
120
0
    out[ 3] = (((byte)((in[0] >> 24)       ))      );
121
0
    out[ 4] = (((byte)((in[0] >> 32)       ))      );
122
0
    out[ 5] = (((byte)((in[0] >> 40)       ))      );
123
0
    out[ 6] = (((byte)((in[0] >> 48) & 0x07))      )
124
0
            | (((byte)((in[1]      ) & 0x1f)) <<  3);
125
0
    out[ 7] = (((byte)((in[1] >>  5)       ))      );
126
0
    out[ 8] = (((byte)((in[1] >> 13)       ))      );
127
0
    out[ 9] = (((byte)((in[1] >> 21)       ))      );
128
0
    out[10] = (((byte)((in[1] >> 29)       ))      );
129
0
    out[11] = (((byte)((in[1] >> 37)       ))      );
130
0
    out[12] = (((byte)((in[1] >> 45) & 0x3f))      )
131
0
            | (((byte)((in[2]      ) & 0x03)) <<  6);
132
0
    out[13] = (((byte)((in[2] >>  2)       ))      );
133
0
    out[14] = (((byte)((in[2] >> 10)       ))      );
134
0
    out[15] = (((byte)((in[2] >> 18)       ))      );
135
0
    out[16] = (((byte)((in[2] >> 26)       ))      );
136
0
    out[17] = (((byte)((in[2] >> 34)       ))      );
137
0
    out[18] = (((byte)((in[2] >> 42)       ))      );
138
0
    out[19] = (((byte)((in[2] >> 50) & 0x01))      )
139
0
            | (((byte)((in[3]      ) & 0x7f)) <<  1);
140
0
    out[20] = (((byte)((in[3] >>  7)       ))      );
141
0
    out[21] = (((byte)((in[3] >> 15)       ))      );
142
0
    out[22] = (((byte)((in[3] >> 23)       ))      );
143
0
    out[23] = (((byte)((in[3] >> 31)       ))      );
144
0
    out[24] = (((byte)((in[3] >> 39)       ))      );
145
0
    out[25] = (((byte)((in[3] >> 47) & 0x0f))      )
146
0
            | (((byte)((in[4]      ) & 0x0f)) <<  4);
147
0
    out[26] = (((byte)((in[4] >>  4)       ))      );
148
0
    out[27] = (((byte)((in[4] >> 12)       ))      );
149
0
    out[28] = (((byte)((in[4] >> 20)       ))      );
150
0
    out[29] = (((byte)((in[4] >> 28)       ))      );
151
0
    out[30] = (((byte)((in[4] >> 36)       ))      );
152
0
    out[31] = (((byte)((in[4] >> 44) & 0x7f))      );
153
0
}
154
155
/* Set the field element to 1.
156
 *
157
 * n  The field element number.
158
 */
159
void fe_1(fe n)
160
0
{
161
0
    n[0] = 0x0000000000001;
162
0
    n[1] = 0x0000000000000;
163
0
    n[2] = 0x0000000000000;
164
0
    n[3] = 0x0000000000000;
165
0
    n[4] = 0x0000000000000;
166
0
}
167
168
/* Set the field element to 0.
169
 *
170
 * n  The field element number.
171
 */
172
void fe_0(fe n)
173
0
{
174
0
    n[0] = 0x0000000000000;
175
0
    n[1] = 0x0000000000000;
176
0
    n[2] = 0x0000000000000;
177
0
    n[3] = 0x0000000000000;
178
0
    n[4] = 0x0000000000000;
179
0
}
180
181
/* Copy field element a into field element r.
182
 *
183
 * r  Field element to copy into.
184
 * a  Field element to copy.
185
 */
186
void fe_copy(fe r, const fe a)
187
0
{
188
0
    r[0] = a[0];
189
0
    r[1] = a[1];
190
0
    r[2] = a[2];
191
0
    r[3] = a[3];
192
0
    r[4] = a[4];
193
0
}
194
195
/* Constant time, conditional swap of field elements a and b.
196
 *
197
 * f  A field element.
198
 * g  A field element.
199
 * b  If 1 then swap and if 0 then don't swap.
200
 */
201
void fe_cswap(fe f, fe g, int b)
202
0
{
203
0
    sword64 m = b;
204
0
    sword64 t0, t1, t2, t3, t4;
205
206
    /* Convert conditional into mask. */
207
0
    m = -m;
208
0
    t0 = m & (f[0] ^ g[0]);
209
0
    t1 = m & (f[1] ^ g[1]);
210
0
    t2 = m & (f[2] ^ g[2]);
211
0
    t3 = m & (f[3] ^ g[3]);
212
0
    t4 = m & (f[4] ^ g[4]);
213
214
0
    f[0] ^= t0;
215
0
    f[1] ^= t1;
216
0
    f[2] ^= t2;
217
0
    f[3] ^= t3;
218
0
    f[4] ^= t4;
219
220
0
    g[0] ^= t0;
221
0
    g[1] ^= t1;
222
0
    g[2] ^= t2;
223
0
    g[3] ^= t3;
224
0
    g[4] ^= t4;
225
0
}
226
227
/* Subtract b from a into r. (r = a - b)
228
 *
229
 * r  A field element.
230
 * a  A field element.
231
 * b  A field element.
232
 */
233
void fe_sub(fe r, const fe a, const fe b)
234
0
{
235
0
    r[0] = a[0] - b[0];
236
0
    r[1] = a[1] - b[1];
237
0
    r[2] = a[2] - b[2];
238
0
    r[3] = a[3] - b[3];
239
0
    r[4] = a[4] - b[4];
240
0
}
241
242
/* Add b to a into r. (r = a + b)
243
 *
244
 * r  A field element.
245
 * a  A field element.
246
 * b  A field element.
247
 */
248
void fe_add(fe r, const fe a, const fe b)
249
0
{
250
0
    r[0] = a[0] + b[0];
251
0
    r[1] = a[1] + b[1];
252
0
    r[2] = a[2] + b[2];
253
0
    r[3] = a[3] + b[3];
254
0
    r[4] = a[4] + b[4];
255
0
}
256
257
/* Multiply a and b into r. (r = a * b)
258
 *
259
 * r  A field element.
260
 * a  A field element.
261
 * b  A field element.
262
 */
263
void fe_mul(fe r, const fe a, const fe b)
264
0
{
265
0
    const __int128_t k19 = 19;
266
0
    __int128_t t0 = ((__int128_t)a[0]) * b[0];
267
0
    __int128_t t1 = ((__int128_t)a[0]) * b[1]
268
0
                  + ((__int128_t)a[1]) * b[0];
269
0
    __int128_t t2 = ((__int128_t)a[0]) * b[2]
270
0
                  + ((__int128_t)a[1]) * b[1]
271
0
                  + ((__int128_t)a[2]) * b[0];
272
0
    __int128_t t3 = ((__int128_t)a[0]) * b[3]
273
0
                  + ((__int128_t)a[1]) * b[2]
274
0
                  + ((__int128_t)a[2]) * b[1]
275
0
                  + ((__int128_t)a[3]) * b[0];
276
0
    __int128_t t4 = ((__int128_t)a[0]) * b[4]
277
0
                  + ((__int128_t)a[1]) * b[3]
278
0
                  + ((__int128_t)a[2]) * b[2]
279
0
                  + ((__int128_t)a[3]) * b[1]
280
0
                  + ((__int128_t)a[4]) * b[0];
281
0
    __int128_t t5 = ((__int128_t)a[1]) * b[4]
282
0
                  + ((__int128_t)a[2]) * b[3]
283
0
                  + ((__int128_t)a[3]) * b[2]
284
0
                  + ((__int128_t)a[4]) * b[1];
285
0
    __int128_t t6 = ((__int128_t)a[2]) * b[4]
286
0
                  + ((__int128_t)a[3]) * b[3]
287
0
                  + ((__int128_t)a[4]) * b[2];
288
0
    __int128_t t7 = ((__int128_t)a[3]) * b[4]
289
0
                  + ((__int128_t)a[4]) * b[3];
290
0
    __int128_t t8 = ((__int128_t)a[4]) * b[4];
291
292
    /* Modulo reduce double long word. */
293
0
    t0 += t5 * k19;
294
0
    t1 += t6 * k19;
295
0
    t2 += t7 * k19;
296
0
    t3 += t8 * k19;
297
298
    /* Normalize to 51-bits of data per word. */
299
0
    t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
300
301
0
    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
302
0
    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
303
0
    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
304
0
    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
305
0
    r[0] += (sword64)((t4 >> 51) * k19);
306
0
    r[4] = t4 & 0x7ffffffffffff;
307
0
}
308
309
/* Square a and put result in r. (r = a * a)
310
 *
311
 * r  A field element.
312
 * a  A field element.
313
 * b  A field element.
314
 */
315
void fe_sq(fe r, const fe a)
316
0
{
317
0
    const __int128_t k19 = 19;
318
0
    const __int128_t k2 = 2;
319
0
    __int128_t t0 = ((__int128_t)a[0]) * a[0];
320
0
    __int128_t t1 = ((__int128_t)a[0]) * a[1] * k2;
321
0
    __int128_t t2 = ((__int128_t)a[0]) * a[2] * k2
322
0
                  + ((__int128_t)a[1]) * a[1];
323
0
    __int128_t t3 = ((__int128_t)a[0]) * a[3] * k2
324
0
                  + ((__int128_t)a[1]) * a[2] * k2;
325
0
    __int128_t t4 = ((__int128_t)a[0]) * a[4] * k2
326
0
                  + ((__int128_t)a[1]) * a[3] * k2
327
0
                  + ((__int128_t)a[2]) * a[2];
328
0
    __int128_t t5 = ((__int128_t)a[1]) * a[4] * k2
329
0
                  + ((__int128_t)a[2]) * a[3] * k2;
330
0
    __int128_t t6 = ((__int128_t)a[2]) * a[4] * k2
331
0
                  + ((__int128_t)a[3]) * a[3];
332
0
    __int128_t t7 = ((__int128_t)a[3]) * a[4] * k2;
333
0
    __int128_t t8 = ((__int128_t)a[4]) * a[4];
334
335
    /* Modulo reduce double long word. */
336
0
    t0 += t5 * k19;
337
0
    t1 += t6 * k19;
338
0
    t2 += t7 * k19;
339
0
    t3 += t8 * k19;
340
341
    /* Normalize to 51-bits of data per word. */
342
0
    t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
343
344
0
    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
345
0
    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
346
0
    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
347
0
    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
348
0
    r[0] += (sword64)((t4 >> 51) * k19);
349
0
    r[4] = t4 & 0x7ffffffffffff;
350
0
}
351
352
/* Multiply a by 121666 and put result in r. (r = 121666 * a)
353
 *
354
 * r  A field element.
355
 * a  A field element.
356
 * b  A field element.
357
 */
358
void fe_mul121666(fe r, fe a)
359
0
{
360
0
    const __int128_t k19 = 19;
361
0
    const __int128_t k121666 = 121666;
362
0
    __int128_t t0 = ((__int128_t)a[0]) * k121666;
363
0
    __int128_t t1 = ((__int128_t)a[1]) * k121666;
364
0
    __int128_t t2 = ((__int128_t)a[2]) * k121666;
365
0
    __int128_t t3 = ((__int128_t)a[3]) * k121666;
366
0
    __int128_t t4 = ((__int128_t)a[4]) * k121666;
367
368
    /* Normalize to 51-bits of data per word. */
369
0
    t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
370
371
0
    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
372
0
    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
373
0
    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
374
0
    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
375
0
    r[0] += (sword64)((t4 >> 51) * k19);
376
0
    r[4] = t4 & 0x7ffffffffffff;
377
0
}
378
379
/* Find the inverse of a modulo 2^255 - 1 and put result in r.
380
 * (r * a) mod (2^255 - 1) = 1
381
 * Implementation is constant time.
382
 *
383
 * r  A field element.
384
 * a  A field element.
385
 */
386
void fe_invert(fe r, const fe a)
387
0
{
388
0
    fe  t0, t1, t2, t3;
389
0
    int i;
390
391
    /* a ^ (2^255 - 21) */
392
0
    fe_sq(t0,  a); for (i = 1; i <   1; ++i) fe_sq(t0, t0);
393
0
    fe_sq(t1, t0); for (i = 1; i <   2; ++i) fe_sq(t1, t1); fe_mul(t1,  a, t1);
394
0
    fe_mul(t0, t0, t1);
395
0
    fe_sq(t2, t0); for (i = 1; i <   1; ++i) fe_sq(t2, t2); fe_mul(t1, t1, t2);
396
0
    fe_sq(t2, t1); for (i = 1; i <   5; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
397
0
    fe_sq(t2, t1); for (i = 1; i <  10; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
398
0
    fe_sq(t3, t2); for (i = 1; i <  20; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
399
0
    fe_sq(t2, t2); for (i = 1; i <  10; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
400
0
    fe_sq(t2, t1); for (i = 1; i <  50; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
401
0
    fe_sq(t3, t2); for (i = 1; i < 100; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
402
0
    fe_sq(t2, t2); for (i = 1; i <  50; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
403
0
    fe_sq(t1, t1); for (i = 1; i <   5; ++i) fe_sq(t1, t1); fe_mul( r, t1, t0);
404
0
}
405
406
#ifndef CURVE25519_SMALL
407
/* Scalar multiply the field element a by n using Montgomery Ladder and places
408
 * result in r.
409
 *
410
 * r  A field element as an array of bytes.
411
 * n  The scalar as an array of bytes.
412
 * a  A field element as an array of bytes.
413
 */
414
int curve25519(byte* r, const byte* n, const byte* a)
415
0
{
416
0
    fe           x1, x2, z2, x3, z3;
417
0
    fe           t0, t1;
418
0
    int          pos;
419
0
    unsigned int swap;
420
0
    unsigned int b;
421
422
0
    fe_frombytes(x1, a);
423
0
    fe_1(x2);
424
0
    fe_0(z2);
425
0
    fe_copy(x3, x1);
426
0
    fe_1(z3);
427
428
0
    swap = 0;
429
0
    for (pos = 254;pos >= 0;--pos) {
430
0
        b = n[pos / 8] >> (pos & 7);
431
0
        b &= 1;
432
0
        swap ^= b;
433
0
        fe_cswap(x2, x3, (int)swap);
434
0
        fe_cswap(z2, z3, (int)swap);
435
0
        swap = b;
436
437
0
        fe_sub(t0, x3, z3);
438
0
        fe_sub(t1, x2, z2);
439
0
        fe_add(x2, x2, z2);
440
0
        fe_add(z2, x3, z3);
441
0
        fe_mul(z3, t0, x2);
442
0
        fe_mul(z2, z2, t1);
443
0
        fe_sq(t0, t1);
444
0
        fe_sq(t1, x2);
445
0
        fe_add(x3, z3, z2);
446
0
        fe_sub(z2, z3, z2);
447
0
        fe_mul(x2, t1, t0);
448
0
        fe_sub(t1, t1, t0);
449
0
        fe_sq(z2, z2);
450
0
        fe_mul121666(z3, t1);
451
0
        fe_sq(x3, x3);
452
0
        fe_add(t0, t0, z3);
453
0
        fe_mul(z3, x1, z2);
454
0
        fe_mul(z2, t1, t0);
455
0
    }
456
0
    fe_cswap(x2, x3, (int)swap);
457
0
    fe_cswap(z2, z3, (int)swap);
458
459
0
    fe_invert(z2, z2);
460
0
    fe_mul(x2, x2, z2);
461
0
    fe_tobytes(r, x2);
462
463
0
    return 0;
464
0
}
465
#endif /* !CURVE25519_SMALL */
466
467
/* The field element value 0 as an array of bytes. */
468
static const unsigned char zero[32] = {0};
469
470
/* Constant time check as to whether a is not 0.
471
 *
472
 * a  A field element.
473
 */
474
int fe_isnonzero(const fe a)
475
0
{
476
0
    unsigned char s[32];
477
0
    fe_tobytes(s, a);
478
0
    return ConstantCompare(s, zero, 32);
479
0
}
480
481
/* Checks whether a is negative.
482
 *
483
 * a  A field element.
484
 */
485
int fe_isnegative(const fe a)
486
0
{
487
0
    unsigned char s[32];
488
0
    fe_tobytes(s, a);
489
0
    return s[0] & 1;
490
0
}
491
492
/* Negates field element a and stores the result in r.
493
 *
494
 * r  A field element.
495
 * a  A field element.
496
 */
497
void fe_neg(fe r, const fe a)
498
0
{
499
0
    r[0] = -a[0];
500
0
    r[1] = -a[1];
501
0
    r[2] = -a[2];
502
0
    r[3] = -a[3];
503
0
    r[4] = -a[4];
504
0
}
505
506
/* Constant time, conditional move of b into a.
507
 * a is not changed if the condition is 0.
508
 *
509
 * f  A field element.
510
 * g  A field element.
511
 * b  If 1 then copy and if 0 then don't copy.
512
 */
513
void fe_cmov(fe f, const fe g, int b)
514
0
{
515
0
    sword64 m = b;
516
0
    sword64 t0, t1, t2, t3, t4;
517
518
    /* Convert conditional into mask. */
519
0
    m = -m;
520
0
    t0 = m & (f[0] ^ g[0]);
521
0
    t1 = m & (f[1] ^ g[1]);
522
0
    t2 = m & (f[2] ^ g[2]);
523
0
    t3 = m & (f[3] ^ g[3]);
524
0
    t4 = m & (f[4] ^ g[4]);
525
526
0
    f[0] ^= t0;
527
0
    f[1] ^= t1;
528
0
    f[2] ^= t2;
529
0
    f[3] ^= t3;
530
0
    f[4] ^= t4;
531
0
}
532
533
void fe_pow22523(fe r, const fe a)
534
0
{
535
0
    fe t0, t1, t2;
536
0
    int i;
537
538
    /* a ^ (2^255 - 23) */
539
0
    fe_sq(t0,  a); for (i = 1; i <   1; ++i) fe_sq(t0, t0);
540
0
    fe_sq(t1, t0); for (i = 1; i <   2; ++i) fe_sq(t1, t1); fe_mul(t1,  a, t1);
541
0
    fe_mul(t0, t0, t1);
542
0
    fe_sq(t0, t0); for (i = 1; i <   1; ++i) fe_sq(t0, t0); fe_mul(t0, t1, t0);
543
0
    fe_sq(t1, t0); for (i = 1; i <   5; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
544
0
    fe_sq(t1, t0); for (i = 1; i <  10; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
545
0
    fe_sq(t2, t1); for (i = 1; i <  20; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
546
0
    fe_sq(t1, t1); for (i = 1; i <  10; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
547
0
    fe_sq(t1, t0); for (i = 1; i <  50; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
548
0
    fe_sq(t2, t1); for (i = 1; i < 100; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
549
0
    fe_sq(t1, t1); for (i = 1; i <  50; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
550
0
    fe_sq(t0, t0); for (i = 1; i <   2; ++i) fe_sq(t0, t0); fe_mul( r, t0, a);
551
552
0
    return;
553
0
}
554
555
/* Double the square of a and put result in r. (r = 2 * a * a)
556
 *
557
 * r  A field element.
558
 * a  A field element.
559
 * b  A field element.
560
 */
561
void fe_sq2(fe r, const fe a)
562
0
{
563
0
    const __int128_t k2 = 2;
564
0
    const __int128_t k19 = 19;
565
0
    __int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]);
566
0
    __int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2);
567
0
    __int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2
568
0
                  + ((__int128_t)a[1]) * a[1]);
569
0
    __int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2
570
0
                  + ((__int128_t)a[1]) * a[2] * k2);
571
0
    __int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2
572
0
                  + ((__int128_t)a[1]) * a[3] * k2
573
0
                  + ((__int128_t)a[2]) * a[2]);
574
0
    __int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2
575
0
                  + ((__int128_t)a[2]) * a[3] * k2);
576
0
    __int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2
577
0
                  + ((__int128_t)a[3]) * a[3]);
578
0
    __int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2);
579
0
    __int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]);
580
581
    /* Modulo reduce double long word. */
582
0
    t0 += t5 * k19;
583
0
    t1 += t6 * k19;
584
0
    t2 += t7 * k19;
585
0
    t3 += t8 * k19;
586
587
    /* Normalize to 51-bits of data per word. */
588
0
    t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
589
590
0
    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
591
0
    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
592
0
    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
593
0
    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
594
0
    r[0] += (sword64)((t4 >> 51) * k19);
595
0
    r[4] = t4 & 0x7ffffffffffff;
596
0
}
597
598
/* Load 3 little endian bytes into a 64-bit word.
599
 *
600
 * in  An array of bytes.
601
 * returns a 64-bit word.
602
 */
603
word64 load_3(const unsigned char *in)
604
0
{
605
0
    word64 result;
606
607
0
    result = ((((word64)in[0])      ) |
608
0
              (((word64)in[1]) <<  8) |
609
0
              (((word64)in[2]) << 16));
610
611
0
    return result;
612
0
}
613
614
/* Load 4 little endian bytes into a 64-bit word.
615
 *
616
 * in  An array of bytes.
617
 * returns a 64-bit word.
618
 */
619
word64 load_4(const unsigned char *in)
620
0
{
621
0
    word64 result;
622
623
0
    result = ((((word64)in[0])      ) |
624
0
              (((word64)in[1]) <<  8) |
625
0
              (((word64)in[2]) << 16) |
626
0
              (((word64)in[3]) << 24));
627
628
0
    return result;
629
0
}
630