Coverage Report

Created: 2026-05-18 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wolfssl-heapmath/wolfcrypt/src/fe_x25519_128.h
Line
Count
Source
1
/* fe_x25519_128.h
2
 *
3
 * Copyright (C) 2006-2026 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Generated using (from wolfssl):
23
 *   cd ../scripts
24
 *   ruby ./x25519/fe_x25519_128_gen.rb > ../wolfssl/wolfcrypt/src/fe_x25519_128.h
25
 */
26
27
void fe_init(void)
28
10.4k
{
29
10.4k
}
30
31
/* Convert a number represented as an array of bytes to an array of words with
32
 * 51-bits of data in each word.
33
 *
34
 * in   An array of bytes.
35
 * out  An array of words.
36
 */
37
void fe_frombytes(fe out, const unsigned char *in)
38
9.21k
{
39
9.21k
    out[0] = (sword64)(
40
9.21k
             (((word64)((in[ 0]      )       ))      )
41
9.21k
           | (((word64)((in[ 1]      )       )) <<  8)
42
9.21k
           | (((word64)((in[ 2]      )       )) << 16)
43
9.21k
           | (((word64)((in[ 3]      )       )) << 24)
44
9.21k
           | (((word64)((in[ 4]      )       )) << 32)
45
9.21k
           | (((word64)((in[ 5]      )       )) << 40)
46
9.21k
           | (((word64)((in[ 6]      ) & 0x07)) << 48));
47
9.21k
    out[1] = (sword64)(
48
9.21k
             (((word64)((in[ 6] >>  3) & 0x1f))      )
49
9.21k
           | (((word64)((in[ 7]      )       )) <<  5)
50
9.21k
           | (((word64)((in[ 8]      )       )) << 13)
51
9.21k
           | (((word64)((in[ 9]      )       )) << 21)
52
9.21k
           | (((word64)((in[10]      )       )) << 29)
53
9.21k
           | (((word64)((in[11]      )       )) << 37)
54
9.21k
           | (((word64)((in[12]      ) & 0x3f)) << 45));
55
9.21k
    out[2] = (sword64)(
56
9.21k
             (((word64)((in[12] >>  6) & 0x03))      )
57
9.21k
           | (((word64)((in[13]      )       )) <<  2)
58
9.21k
           | (((word64)((in[14]      )       )) << 10)
59
9.21k
           | (((word64)((in[15]      )       )) << 18)
60
9.21k
           | (((word64)((in[16]      )       )) << 26)
61
9.21k
           | (((word64)((in[17]      )       )) << 34)
62
9.21k
           | (((word64)((in[18]      )       )) << 42)
63
9.21k
           | (((word64)((in[19]      ) & 0x01)) << 50));
64
9.21k
    out[3] = (sword64)(
65
9.21k
             (((word64)((in[19] >>  1) & 0x7f))      )
66
9.21k
           | (((word64)((in[20]      )       )) <<  7)
67
9.21k
           | (((word64)((in[21]      )       )) << 15)
68
9.21k
           | (((word64)((in[22]      )       )) << 23)
69
9.21k
           | (((word64)((in[23]      )       )) << 31)
70
9.21k
           | (((word64)((in[24]      )       )) << 39)
71
9.21k
           | (((word64)((in[25]      ) & 0x0f)) << 47));
72
9.21k
    out[4] = (sword64)(
73
9.21k
             (((word64)((in[25] >>  4) & 0x0f))      )
74
9.21k
           | (((word64)((in[26]      )       )) <<  4)
75
9.21k
           | (((word64)((in[27]      )       )) << 12)
76
9.21k
           | (((word64)((in[28]      )       )) << 20)
77
9.21k
           | (((word64)((in[29]      )       )) << 28)
78
9.21k
           | (((word64)((in[30]      )       )) << 36)
79
9.21k
           | (((word64)((in[31]      ) & 0x7f)) << 44));
80
9.21k
}
81
82
/* Convert a number represented as an array of words to an array of bytes.
83
 * The array of words is normalized to an array of 51-bit data words and if
84
 * greater than the mod, modulo reduced by the prime 2^255 - 1.
85
 *
86
 * n    An array of words.
87
 * out  An array of bytes.
88
 */
89
void fe_tobytes(unsigned char *out, const fe n)
90
8.72k
{
91
8.72k
    fe      in;
92
8.72k
    sword64 c;
93
94
8.72k
    in[0] = n[0];
95
8.72k
    in[1] = n[1];
96
8.72k
    in[2] = n[2];
97
8.72k
    in[3] = n[3];
98
8.72k
    in[4] = n[4];
99
100
    /* Normalize to 51-bits of data per word. */
101
8.72k
    in[0] += (in[4] >> 51) * 19; in[4] &= 0x7ffffffffffff;
102
103
8.72k
    in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
104
8.72k
    in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
105
8.72k
    in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
106
8.72k
    in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
107
8.72k
    in[0] += (in[4] >> 51) * 19;
108
8.72k
    in[4] &= 0x7ffffffffffff;
109
110
8.72k
    c = (in[0] + 19) >> 51;
111
8.72k
    c = (in[1] + c) >> 51;
112
8.72k
    c = (in[2] + c) >> 51;
113
8.72k
    c = (in[3] + c) >> 51;
114
8.72k
    c = (in[4] + c) >> 51;
115
8.72k
    in[0] += c * 19;
116
8.72k
    in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
117
8.72k
    in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
118
8.72k
    in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
119
8.72k
    in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
120
8.72k
    in[4] &= 0x7ffffffffffff;
121
122
8.72k
    out[ 0] = (((byte)((in[0]      )       ))      );
123
8.72k
    out[ 1] = (((byte)((in[0] >>  8)       ))      );
124
8.72k
    out[ 2] = (((byte)((in[0] >> 16)       ))      );
125
8.72k
    out[ 3] = (((byte)((in[0] >> 24)       ))      );
126
8.72k
    out[ 4] = (((byte)((in[0] >> 32)       ))      );
127
8.72k
    out[ 5] = (((byte)((in[0] >> 40)       ))      );
128
8.72k
    out[ 6] = (byte)((((byte)((in[0] >> 48) & 0x07))      )
129
8.72k
                   | (((byte)((in[1]      ) & 0x1f)) <<  3));
130
8.72k
    out[ 7] = (((byte)((in[1] >>  5)       ))      );
131
8.72k
    out[ 8] = (((byte)((in[1] >> 13)       ))      );
132
8.72k
    out[ 9] = (((byte)((in[1] >> 21)       ))      );
133
8.72k
    out[10] = (((byte)((in[1] >> 29)       ))      );
134
8.72k
    out[11] = (((byte)((in[1] >> 37)       ))      );
135
8.72k
    out[12] = (byte)((((byte)((in[1] >> 45) & 0x3f))      )
136
8.72k
                   | (((byte)((in[2]      ) & 0x03)) <<  6));
137
8.72k
    out[13] = (((byte)((in[2] >>  2)       ))      );
138
8.72k
    out[14] = (((byte)((in[2] >> 10)       ))      );
139
8.72k
    out[15] = (((byte)((in[2] >> 18)       ))      );
140
8.72k
    out[16] = (((byte)((in[2] >> 26)       ))      );
141
8.72k
    out[17] = (((byte)((in[2] >> 34)       ))      );
142
8.72k
    out[18] = (((byte)((in[2] >> 42)       ))      );
143
8.72k
    out[19] = (byte)((((byte)((in[2] >> 50) & 0x01))      )
144
8.72k
                   | (((byte)((in[3]      ) & 0x7f)) <<  1));
145
8.72k
    out[20] = (((byte)((in[3] >>  7)       ))      );
146
8.72k
    out[21] = (((byte)((in[3] >> 15)       ))      );
147
8.72k
    out[22] = (((byte)((in[3] >> 23)       ))      );
148
8.72k
    out[23] = (((byte)((in[3] >> 31)       ))      );
149
8.72k
    out[24] = (((byte)((in[3] >> 39)       ))      );
150
8.72k
    out[25] = (byte)((((byte)((in[3] >> 47) & 0x0f))      )
151
8.72k
                   | (((byte)((in[4]      ) & 0x0f)) <<  4));
152
8.72k
    out[26] = (((byte)((in[4] >>  4)       ))      );
153
8.72k
    out[27] = (((byte)((in[4] >> 12)       ))      );
154
8.72k
    out[28] = (((byte)((in[4] >> 20)       ))      );
155
8.72k
    out[29] = (((byte)((in[4] >> 28)       ))      );
156
8.72k
    out[30] = (((byte)((in[4] >> 36)       ))      );
157
8.72k
    out[31] = (((byte)((in[4] >> 44) & 0x7f))      );
158
8.72k
}
159
160
/* Set the field element to 1.
161
 *
162
 * n  The field element number.
163
 */
164
void fe_1(fe n)
165
122k
{
166
122k
    n[0] = 0x0000000000001;
167
122k
    n[1] = 0x0000000000000;
168
122k
    n[2] = 0x0000000000000;
169
122k
    n[3] = 0x0000000000000;
170
122k
    n[4] = 0x0000000000000;
171
122k
}
172
173
/* Set the field element to 0.
174
 *
175
 * n  The field element number.
176
 */
177
void fe_0(fe n)
178
63.6k
{
179
63.6k
    n[0] = 0x0000000000000;
180
63.6k
    n[1] = 0x0000000000000;
181
63.6k
    n[2] = 0x0000000000000;
182
63.6k
    n[3] = 0x0000000000000;
183
63.6k
    n[4] = 0x0000000000000;
184
63.6k
}
185
186
/* Copy field element a into field element r.
187
 *
188
 * r  Field element to copy into.
189
 * a  Field element to copy.
190
 */
191
void fe_copy(fe r, const fe a)
192
10.2k
{
193
10.2k
    r[0] = a[0];
194
10.2k
    r[1] = a[1];
195
10.2k
    r[2] = a[2];
196
10.2k
    r[3] = a[3];
197
10.2k
    r[4] = a[4];
198
10.2k
}
199
200
/* Constant time, conditional swap of field elements a and b.
201
 *
202
 * f  A field element.
203
 * g  A field element.
204
 * b  If 1 then swap and if 0 then don't swap.
205
 */
206
void fe_cswap(fe f, fe g, int b)
207
4.31M
{
208
4.31M
    sword64 m = b;
209
4.31M
    sword64 t0, t1, t2, t3, t4;
210
211
    /* Convert conditional into mask. */
212
4.31M
    m = -m;
213
4.31M
    t0 = m & (f[0] ^ g[0]);
214
4.31M
    t1 = m & (f[1] ^ g[1]);
215
4.31M
    t2 = m & (f[2] ^ g[2]);
216
4.31M
    t3 = m & (f[3] ^ g[3]);
217
4.31M
    t4 = m & (f[4] ^ g[4]);
218
219
4.31M
    f[0] ^= t0;
220
4.31M
    f[1] ^= t1;
221
4.31M
    f[2] ^= t2;
222
4.31M
    f[3] ^= t3;
223
4.31M
    f[4] ^= t4;
224
225
4.31M
    g[0] ^= t0;
226
4.31M
    g[1] ^= t1;
227
4.31M
    g[2] ^= t2;
228
4.31M
    g[3] ^= t3;
229
4.31M
    g[4] ^= t4;
230
4.31M
}
231
232
/* Subtract b from a into r. (r = a - b)
233
 *
234
 * r  A field element.
235
 * a  A field element.
236
 * b  A field element.
237
 */
238
void fe_sub(fe r, const fe a, const fe b)
239
4.73M
{
240
4.73M
    r[0] = a[0] - b[0];
241
4.73M
    r[1] = a[1] - b[1];
242
4.73M
    r[2] = a[2] - b[2];
243
4.73M
    r[3] = a[3] - b[3];
244
4.73M
    r[4] = a[4] - b[4];
245
4.73M
}
246
247
/* Add b to a into r. (r = a + b)
248
 *
249
 * r  A field element.
250
 * a  A field element.
251
 * b  A field element.
252
 */
253
void fe_add(fe r, const fe a, const fe b)
254
4.74M
{
255
4.74M
    r[0] = a[0] + b[0];
256
4.74M
    r[1] = a[1] + b[1];
257
4.74M
    r[2] = a[2] + b[2];
258
4.74M
    r[3] = a[3] + b[3];
259
4.74M
    r[4] = a[4] + b[4];
260
4.74M
}
261
262
/* Multiply a and b into r. (r = a * b)
263
 *
264
 * r  A field element.
265
 * a  A field element.
266
 * b  A field element.
267
 */
268
void fe_mul(fe r, const fe a, const fe b)
269
6.22M
{
270
6.22M
    const __int128_t k19 = 19;
271
6.22M
    __int128_t t0 = ((__int128_t)a[0]) * b[0];
272
6.22M
    __int128_t t1 = ((__int128_t)a[0]) * b[1]
273
6.22M
                  + ((__int128_t)a[1]) * b[0];
274
6.22M
    __int128_t t2 = ((__int128_t)a[0]) * b[2]
275
6.22M
                  + ((__int128_t)a[1]) * b[1]
276
6.22M
                  + ((__int128_t)a[2]) * b[0];
277
6.22M
    __int128_t t3 = ((__int128_t)a[0]) * b[3]
278
6.22M
                  + ((__int128_t)a[1]) * b[2]
279
6.22M
                  + ((__int128_t)a[2]) * b[1]
280
6.22M
                  + ((__int128_t)a[3]) * b[0];
281
6.22M
    __int128_t t4 = ((__int128_t)a[0]) * b[4]
282
6.22M
                  + ((__int128_t)a[1]) * b[3]
283
6.22M
                  + ((__int128_t)a[2]) * b[2]
284
6.22M
                  + ((__int128_t)a[3]) * b[1]
285
6.22M
                  + ((__int128_t)a[4]) * b[0];
286
6.22M
    __int128_t t5 = ((__int128_t)a[1]) * b[4]
287
6.22M
                  + ((__int128_t)a[2]) * b[3]
288
6.22M
                  + ((__int128_t)a[3]) * b[2]
289
6.22M
                  + ((__int128_t)a[4]) * b[1];
290
6.22M
    __int128_t t6 = ((__int128_t)a[2]) * b[4]
291
6.22M
                  + ((__int128_t)a[3]) * b[3]
292
6.22M
                  + ((__int128_t)a[4]) * b[2];
293
6.22M
    __int128_t t7 = ((__int128_t)a[3]) * b[4]
294
6.22M
                  + ((__int128_t)a[4]) * b[3];
295
6.22M
    __int128_t t8 = ((__int128_t)a[4]) * b[4];
296
297
    /* Modulo reduce double long word. */
298
6.22M
    t0 += t5 * k19;
299
6.22M
    t1 += t6 * k19;
300
6.22M
    t2 += t7 * k19;
301
6.22M
    t3 += t8 * k19;
302
303
    /* Normalize to 51-bits of data per word. */
304
6.22M
    t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
305
306
6.22M
    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
307
6.22M
    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
308
6.22M
    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
309
6.22M
    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
310
6.22M
    r[0] += (sword64)((t4 >> 51) * k19);
311
6.22M
    r[4] = t4 & 0x7ffffffffffff;
312
6.22M
}
313
314
/* Square a and put result in r. (r = a * a)
315
 *
316
 * r  A field element.
317
 * a  A field element.
318
 * b  A field element.
319
 */
320
void fe_sq(fe r, const fe a)
321
6.07M
{
322
6.07M
    const __int128_t k19 = 19;
323
6.07M
    const __int128_t k2 = 2;
324
6.07M
    __int128_t t0 = ((__int128_t)a[0]) * a[0];
325
6.07M
    __int128_t t1 = ((__int128_t)a[0]) * a[1] * k2;
326
6.07M
    __int128_t t2 = ((__int128_t)a[0]) * a[2] * k2
327
6.07M
                  + ((__int128_t)a[1]) * a[1];
328
6.07M
    __int128_t t3 = ((__int128_t)a[0]) * a[3] * k2
329
6.07M
                  + ((__int128_t)a[1]) * a[2] * k2;
330
6.07M
    __int128_t t4 = ((__int128_t)a[0]) * a[4] * k2
331
6.07M
                  + ((__int128_t)a[1]) * a[3] * k2
332
6.07M
                  + ((__int128_t)a[2]) * a[2];
333
6.07M
    __int128_t t5 = ((__int128_t)a[1]) * a[4] * k2
334
6.07M
                  + ((__int128_t)a[2]) * a[3] * k2;
335
6.07M
    __int128_t t6 = ((__int128_t)a[2]) * a[4] * k2
336
6.07M
                  + ((__int128_t)a[3]) * a[3];
337
6.07M
    __int128_t t7 = ((__int128_t)a[3]) * a[4] * k2;
338
6.07M
    __int128_t t8 = ((__int128_t)a[4]) * a[4];
339
340
    /* Modulo reduce double long word. */
341
6.07M
    t0 += t5 * k19;
342
6.07M
    t1 += t6 * k19;
343
6.07M
    t2 += t7 * k19;
344
6.07M
    t3 += t8 * k19;
345
346
    /* Normalize to 51-bits of data per word. */
347
6.07M
    t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
348
349
6.07M
    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
350
6.07M
    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
351
6.07M
    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
352
6.07M
    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
353
6.07M
    r[0] += (sword64)((t4 >> 51) * k19);
354
6.07M
    r[4] = t4 & 0x7ffffffffffff;
355
6.07M
}
356
357
/* Multiply a by 121666 and put result in r. (r = 121666 * a)
358
 *
359
 * r  A field element.
360
 * a  A field element.
361
 * b  A field element.
362
 */
363
void fe_mul121666(fe r, fe a)
364
1.06M
{
365
1.06M
    const __int128_t k19 = 19;
366
1.06M
    const __int128_t k121666 = 121666;
367
1.06M
    __int128_t t0 = ((__int128_t)a[0]) * k121666;
368
1.06M
    __int128_t t1 = ((__int128_t)a[1]) * k121666;
369
1.06M
    __int128_t t2 = ((__int128_t)a[2]) * k121666;
370
1.06M
    __int128_t t3 = ((__int128_t)a[3]) * k121666;
371
1.06M
    __int128_t t4 = ((__int128_t)a[4]) * k121666;
372
373
    /* Normalize to 51-bits of data per word. */
374
1.06M
    t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
375
376
1.06M
    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
377
1.06M
    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
378
1.06M
    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
379
1.06M
    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
380
1.06M
    r[0] += (sword64)((t4 >> 51) * k19);
381
1.06M
    r[4] = t4 & 0x7ffffffffffff;
382
1.06M
}
383
384
/* Find the inverse of a modulo 2^255 - 1 and put result in r.
385
 * (r * a) mod (2^255 - 1) = 1
386
 * Implementation is constant time.
387
 *
388
 * r  A field element.
389
 * a  A field element.
390
 */
391
void fe_invert(fe r, const fe a)
392
5.39k
{
393
5.39k
    fe  t0, t1, t2, t3;
394
5.39k
    int i;
395
396
    /* a ^ (2^255 - 21) */
397
5.39k
    fe_sq(t0,  a); for (i = 1; i <   1; ++i) fe_sq(t0, t0);
398
10.7k
    fe_sq(t1, t0); for (i = 1; i <   2; ++i) fe_sq(t1, t1); fe_mul(t1,  a, t1);
399
5.39k
    fe_mul(t0, t0, t1);
400
5.39k
    fe_sq(t2, t0); for (i = 1; i <   1; ++i) fe_sq(t2, t2); fe_mul(t1, t1, t2);
401
26.9k
    fe_sq(t2, t1); for (i = 1; i <   5; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
402
53.9k
    fe_sq(t2, t1); for (i = 1; i <  10; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
403
107k
    fe_sq(t3, t2); for (i = 1; i <  20; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
404
53.9k
    fe_sq(t2, t2); for (i = 1; i <  10; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
405
269k
    fe_sq(t2, t1); for (i = 1; i <  50; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
406
539k
    fe_sq(t3, t2); for (i = 1; i < 100; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
407
269k
    fe_sq(t2, t2); for (i = 1; i <  50; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
408
26.9k
    fe_sq(t1, t1); for (i = 1; i <   5; ++i) fe_sq(t1, t1); fe_mul( r, t1, t0);
409
5.39k
}
410
411
#ifndef CURVE25519_SMALL
412
#ifndef WOLFSSL_CURVE25519_BLINDING
413
/* Scalar multiply the field element a by n using Montgomery Ladder and places
414
 * result in r.
415
 *
416
 * r  A field element as an array of bytes.
417
 * n  The scalar as an array of bytes.
418
 * a  A field element as an array of bytes.
419
 */
420
int curve25519(byte* r, const byte* n, const byte* a)
421
{
422
    fe           x1, x2, z2, x3, z3;
423
    fe           t0, t1;
424
    int          pos;
425
    unsigned int swap;
426
    unsigned int b;
427
428
    fe_frombytes(x1, a);
429
    fe_1(x2);
430
    fe_0(z2);
431
    fe_copy(x3, x1);
432
    fe_1(z3);
433
434
    swap = 0;
435
    for (pos = 254;pos >= 0;--pos) {
436
        b = (unsigned int)(n[pos / 8] >> (pos & 7));
437
        b &= 1;
438
        swap ^= b;
439
        fe_cswap(x2, x3, (int)swap);
440
        fe_cswap(z2, z3, (int)swap);
441
        swap = b;
442
443
        fe_sub(t0, x3, z3);
444
        fe_sub(t1, x2, z2);
445
        fe_add(x2, x2, z2);
446
        fe_add(z2, x3, z3);
447
        fe_mul(z3, t0, x2);
448
        fe_mul(z2, z2, t1);
449
        fe_sq(t0, t1);
450
        fe_sq(t1, x2);
451
        fe_add(x3, z3, z2);
452
        fe_sub(z2, z3, z2);
453
        fe_mul(x2, t1, t0);
454
        fe_sub(t1, t1, t0);
455
        fe_sq(z2, z2);
456
        fe_mul121666(z3, t1);
457
        fe_sq(x3, x3);
458
        fe_add(t0, t0, z3);
459
        fe_mul(z3, x1, z2);
460
        fe_mul(z2, t1, t0);
461
    }
462
    fe_cswap(x2, x3, (int)swap);
463
    fe_cswap(z2, z3, (int)swap);
464
465
    fe_invert(z2, z2);
466
    fe_mul(x2, x2, z2);
467
    fe_tobytes(r, x2);
468
469
    return 0;
470
}
471
#else
472
int curve25519_blind(byte* r, const byte* n, const byte* mask, const byte* a,
473
    const byte* rz)
474
4.16k
{
475
4.16k
    fe           x1, x2, z2, x3, z3;
476
4.16k
    fe           t0, t1;
477
4.16k
    int          pos;
478
4.16k
    unsigned int b;
479
480
4.16k
    fe_frombytes(x1, a);
481
4.16k
    fe_1(x2);
482
4.16k
    fe_0(z2);
483
4.16k
    fe_copy(x3, x1);
484
4.16k
    fe_frombytes(z3, rz);
485
4.16k
    fe_mul(x3, x3, z3);
486
487
    /* mask_bits[252] */
488
4.16k
    b = (unsigned int)(mask[31] >> 7);
489
4.16k
    b &= 1;
490
4.16k
    fe_cswap(x2,x3,(int)b);
491
4.16k
    fe_cswap(z2,z3,(int)b);
492
1.06M
    for (pos = 255;pos >= 1;--pos) {
493
1.06M
        b = (unsigned int)(n[pos / 8] >> (pos & 7));
494
1.06M
        b &= 1;
495
1.06M
        fe_cswap(x2, x3, (int)b);
496
1.06M
        fe_cswap(z2, z3, (int)b);
497
498
        /* montgomery */
499
1.06M
        fe_sub(t0, x3, z3);
500
1.06M
        fe_sub(t1, x2, z2);
501
1.06M
        fe_add(x2, x2, z2);
502
1.06M
        fe_add(z2, x3, z3);
503
1.06M
        fe_mul(z3, t0, x2);
504
1.06M
        fe_mul(z2, z2, t1);
505
1.06M
        fe_sq(t0, t1);
506
1.06M
        fe_sq(t1, x2);
507
1.06M
        fe_add(x3, z3, z2);
508
1.06M
        fe_sub(z2, z3, z2);
509
1.06M
        fe_mul(x2, t1, t0);
510
1.06M
        fe_sub(t1, t1, t0);
511
1.06M
        fe_sq(z2, z2);
512
1.06M
        fe_mul121666(z3, t1);
513
1.06M
        fe_sq(x3, x3);
514
1.06M
        fe_add(t0, t0, z3);
515
1.06M
        fe_mul(z3, x1, z2);
516
1.06M
        fe_mul(z2, t1, t0);
517
518
1.06M
        b = (unsigned int)(mask[(pos - 1) / 8] >> ((pos - 1) & 7));
519
1.06M
        b &= 1;
520
1.06M
        fe_cswap(x2, x3, (int)b);
521
1.06M
        fe_cswap(z2, z3, (int)b);
522
1.06M
    }
523
4.16k
    b = (unsigned int)(n[0] & 1);
524
4.16k
    fe_cswap(x2, x3, (int)b);
525
4.16k
    fe_cswap(z2, z3, (int)b);
526
527
4.16k
    fe_invert(z2, z2);
528
4.16k
    fe_mul(x2, x2, z2);
529
4.16k
    fe_tobytes(r, x2);
530
531
4.16k
    return 0;
532
4.16k
}
533
#endif /* WOLFSSL_CURVE25519_BLINDING */
534
#endif /* !CURVE25519_SMALL */
535
536
/* The field element value 0 as an array of bytes. */
537
static const unsigned char zero[32] = {0};
538
539
/* Constant time check as to whether a is not 0.
540
 *
541
 * a  A field element.
542
 */
543
int fe_isnonzero(const fe a)
544
1.27k
{
545
1.27k
    unsigned char s[32];
546
1.27k
    fe_tobytes(s, a);
547
1.27k
    return ConstantCompare(s, zero, 32);
548
1.27k
}
549
550
/* Checks whether a is negative.
551
 *
552
 * a  A field element.
553
 */
554
int fe_isnegative(const fe a)
555
2.05k
{
556
2.05k
    unsigned char s[32];
557
2.05k
    fe_tobytes(s, a);
558
2.05k
    return s[0] & 1;
559
2.05k
}
560
561
/* Negates field element a and stores the result in r.
562
 *
563
 * r  A field element.
564
 * a  A field element.
565
 */
566
void fe_neg(fe r, const fe a)
567
58.6k
{
568
58.6k
    r[0] = -a[0];
569
58.6k
    r[1] = -a[1];
570
58.6k
    r[2] = -a[2];
571
58.6k
    r[3] = -a[3];
572
58.6k
    r[4] = -a[4];
573
58.6k
}
574
575
/* Constant time, conditional move of b into a.
576
 * a is not changed if the condition is 0.
577
 *
578
 * f  A field element.
579
 * g  A field element.
580
 * b  If 1 then copy and if 0 then don't copy.
581
 */
582
void fe_cmov(fe f, const fe g, int b)
583
1.45M
{
584
1.45M
    sword64 m = b;
585
1.45M
    sword64 t0, t1, t2, t3, t4;
586
587
    /* Convert conditional into mask. */
588
1.45M
    m = -m;
589
1.45M
    t0 = m & (f[0] ^ g[0]);
590
1.45M
    t1 = m & (f[1] ^ g[1]);
591
1.45M
    t2 = m & (f[2] ^ g[2]);
592
1.45M
    t3 = m & (f[3] ^ g[3]);
593
1.45M
    t4 = m & (f[4] ^ g[4]);
594
595
1.45M
    f[0] ^= t0;
596
1.45M
    f[1] ^= t1;
597
1.45M
    f[2] ^= t2;
598
1.45M
    f[3] ^= t3;
599
1.45M
    f[4] ^= t4;
600
1.45M
}
601
602
void fe_pow22523(fe r, const fe a)
603
867
{
604
867
    fe t0, t1, t2;
605
867
    int i;
606
607
    /* a ^ (2^255 - 23) */
608
867
    fe_sq(t0,  a); for (i = 1; i <   1; ++i) fe_sq(t0, t0);
609
1.73k
    fe_sq(t1, t0); for (i = 1; i <   2; ++i) fe_sq(t1, t1); fe_mul(t1,  a, t1);
610
867
    fe_mul(t0, t0, t1);
611
867
    fe_sq(t0, t0); for (i = 1; i <   1; ++i) fe_sq(t0, t0); fe_mul(t0, t1, t0);
612
4.33k
    fe_sq(t1, t0); for (i = 1; i <   5; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
613
8.67k
    fe_sq(t1, t0); for (i = 1; i <  10; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
614
17.3k
    fe_sq(t2, t1); for (i = 1; i <  20; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
615
8.67k
    fe_sq(t1, t1); for (i = 1; i <  10; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
616
43.3k
    fe_sq(t1, t0); for (i = 1; i <  50; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
617
86.7k
    fe_sq(t2, t1); for (i = 1; i < 100; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
618
43.3k
    fe_sq(t1, t1); for (i = 1; i <  50; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
619
1.73k
    fe_sq(t0, t0); for (i = 1; i <   2; ++i) fe_sq(t0, t0); fe_mul( r, t0, a);
620
621
867
    return;
622
867
}
623
624
/* Double the square of a and put result in r. (r = 2 * a * a)
625
 *
626
 * r  A field element.
627
 * a  A field element.
628
 * b  A field element.
629
 */
630
void fe_sq2(fe r, const fe a)
631
80.9k
{
632
80.9k
    const __int128_t k2 = 2;
633
80.9k
    const __int128_t k19 = 19;
634
80.9k
    __int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]);
635
80.9k
    __int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2);
636
80.9k
    __int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2
637
80.9k
                  + ((__int128_t)a[1]) * a[1]);
638
80.9k
    __int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2
639
80.9k
                  + ((__int128_t)a[1]) * a[2] * k2);
640
80.9k
    __int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2
641
80.9k
                  + ((__int128_t)a[1]) * a[3] * k2
642
80.9k
                  + ((__int128_t)a[2]) * a[2]);
643
80.9k
    __int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2
644
80.9k
                  + ((__int128_t)a[2]) * a[3] * k2);
645
80.9k
    __int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2
646
80.9k
                  + ((__int128_t)a[3]) * a[3]);
647
80.9k
    __int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2);
648
80.9k
    __int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]);
649
650
    /* Modulo reduce double long word. */
651
80.9k
    t0 += t5 * k19;
652
80.9k
    t1 += t6 * k19;
653
80.9k
    t2 += t7 * k19;
654
80.9k
    t3 += t8 * k19;
655
656
    /* Normalize to 51-bits of data per word. */
657
80.9k
    t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
658
659
80.9k
    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
660
80.9k
    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
661
80.9k
    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
662
80.9k
    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
663
80.9k
    r[0] += (sword64)((t4 >> 51) * k19);
664
80.9k
    r[4] = t4 & 0x7ffffffffffff;
665
80.9k
}
666
667
/* Load 3 little endian bytes into a 64-bit word.
668
 *
669
 * in  An array of bytes.
670
 * returns a 64-bit word.
671
 */
672
sword64 load_3(const unsigned char *in)
673
0
{
674
0
    word64 result;
675
676
0
    result = ((((word64)in[0])      ) |
677
0
              (((word64)in[1]) <<  8) |
678
0
              (((word64)in[2]) << 16));
679
680
0
    return (sword64)result;
681
0
}
682
683
/* Load 4 little endian bytes into a 64-bit word.
684
 *
685
 * in  An array of bytes.
686
 * returns a 64-bit word.
687
 */
688
sword64 load_4(const unsigned char *in)
689
0
{
690
0
    word64 result;
691
692
0
    result = ((((word64)in[0])      ) |
693
0
              (((word64)in[1]) <<  8) |
694
0
              (((word64)in[2]) << 16) |
695
0
              (((word64)in[3]) << 24));
696
697
0
    return (sword64)result;
698
0
}
699