Coverage Report

Created: 2026-04-01 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/botan/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line
Count
Source
1
/*
2
* Ed25519 field element
3
* (C) 2017 Ribose Inc
4
*
5
* Based on the public domain code from SUPERCOP ref10 by
6
* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10
11
#include <botan/internal/ed25519_fe.h>
12
13
#include <botan/internal/ed25519_internal.h>
14
15
namespace Botan {
16
17
//static
18
0
FE_25519 FE_25519::invert(const FE_25519& z) {
19
0
   FE_25519 t0;
20
0
   FE_25519 t1;
21
0
   FE_25519 t2;
22
0
   FE_25519 t3;
23
24
0
   fe_sq(t0, z);
25
0
   fe_sq_iter(t1, t0, 2);
26
0
   fe_mul(t1, z, t1);
27
0
   fe_mul(t0, t0, t1);
28
0
   fe_sq(t2, t0);
29
0
   fe_mul(t1, t1, t2);
30
0
   fe_sq_iter(t2, t1, 5);
31
0
   fe_mul(t1, t2, t1);
32
0
   fe_sq_iter(t2, t1, 10);
33
0
   fe_mul(t2, t2, t1);
34
0
   fe_sq_iter(t3, t2, 20);
35
0
   fe_mul(t2, t3, t2);
36
0
   fe_sq_iter(t2, t2, 10);
37
0
   fe_mul(t1, t2, t1);
38
0
   fe_sq_iter(t2, t1, 50);
39
0
   fe_mul(t2, t2, t1);
40
0
   fe_sq_iter(t3, t2, 100);
41
0
   fe_mul(t2, t3, t2);
42
0
   fe_sq_iter(t2, t2, 50);
43
0
   fe_mul(t1, t2, t1);
44
0
   fe_sq_iter(t1, t1, 5);
45
46
0
   fe_mul(t0, t1, t0);
47
0
   return t0;
48
0
}
49
50
0
FE_25519 FE_25519::pow_22523(const FE_25519& z) {
51
0
   FE_25519 t0;
52
0
   FE_25519 t1;
53
0
   FE_25519 t2;
54
55
0
   fe_sq(t0, z);
56
0
   fe_sq_iter(t1, t0, 2);
57
0
   fe_mul(t1, z, t1);
58
0
   fe_mul(t0, t0, t1);
59
0
   fe_sq(t0, t0);
60
0
   fe_mul(t0, t1, t0);
61
0
   fe_sq_iter(t1, t0, 5);
62
0
   fe_mul(t0, t1, t0);
63
0
   fe_sq_iter(t1, t0, 10);
64
0
   fe_mul(t1, t1, t0);
65
0
   fe_sq_iter(t2, t1, 20);
66
0
   fe_mul(t1, t2, t1);
67
0
   fe_sq_iter(t1, t1, 10);
68
0
   fe_mul(t0, t1, t0);
69
0
   fe_sq_iter(t1, t0, 50);
70
0
   fe_mul(t1, t1, t0);
71
0
   fe_sq_iter(t2, t1, 100);
72
0
   fe_mul(t1, t2, t1);
73
0
   fe_sq_iter(t1, t1, 50);
74
0
   fe_mul(t0, t1, t0);
75
0
   fe_sq_iter(t0, t0, 2);
76
77
0
   fe_mul(t0, t0, z);
78
0
   return t0;
79
0
}
80
81
/*
82
h = f * g
83
Can overlap h with f or g.
84
85
Preconditions:
86
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
87
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
89
Postconditions:
90
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
91
*/
92
93
/*
94
Notes on implementation strategy:
95
96
Using schoolbook multiplication.
97
Karatsuba would save a little in some cost models.
98
99
Most multiplications by 2 and 19 are 32-bit precomputations;
100
cheaper than 64-bit postcomputations.
101
102
There is one remaining multiplication by 19 in the carry chain;
103
one *19 precomputation can be merged into this,
104
but the resulting data flow is considerably less clean.
105
106
There are 12 carries below.
107
10 of them are 2-way parallelizable and vectorizable.
108
Can get away with 11 carries, but then data flow is much deeper.
109
110
With tighter constraints on inputs can squeeze carries into int32.
111
*/
112
113
//static
114
0
FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g) {
115
0
   const int32_t f0 = f[0];
116
0
   const int32_t f1 = f[1];
117
0
   const int32_t f2 = f[2];
118
0
   const int32_t f3 = f[3];
119
0
   const int32_t f4 = f[4];
120
0
   const int32_t f5 = f[5];
121
0
   const int32_t f6 = f[6];
122
0
   const int32_t f7 = f[7];
123
0
   const int32_t f8 = f[8];
124
0
   const int32_t f9 = f[9];
125
126
0
   const int32_t g0 = g[0];
127
0
   const int32_t g1 = g[1];
128
0
   const int32_t g2 = g[2];
129
0
   const int32_t g3 = g[3];
130
0
   const int32_t g4 = g[4];
131
0
   const int32_t g5 = g[5];
132
0
   const int32_t g6 = g[6];
133
0
   const int32_t g7 = g[7];
134
0
   const int32_t g8 = g[8];
135
0
   const int32_t g9 = g[9];
136
137
0
   const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
138
0
   const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
139
0
   const int32_t g3_19 = 19 * g3;
140
0
   const int32_t g4_19 = 19 * g4;
141
0
   const int32_t g5_19 = 19 * g5;
142
0
   const int32_t g6_19 = 19 * g6;
143
0
   const int32_t g7_19 = 19 * g7;
144
0
   const int32_t g8_19 = 19 * g8;
145
0
   const int32_t g9_19 = 19 * g9;
146
0
   const int32_t f1_2 = 2 * f1;
147
0
   const int32_t f3_2 = 2 * f3;
148
0
   const int32_t f5_2 = 2 * f5;
149
0
   const int32_t f7_2 = 2 * f7;
150
0
   const int32_t f9_2 = 2 * f9;
151
152
0
   const int64_t f0g0 = f0 * static_cast<int64_t>(g0);
153
0
   const int64_t f0g1 = f0 * static_cast<int64_t>(g1);
154
0
   const int64_t f0g2 = f0 * static_cast<int64_t>(g2);
155
0
   const int64_t f0g3 = f0 * static_cast<int64_t>(g3);
156
0
   const int64_t f0g4 = f0 * static_cast<int64_t>(g4);
157
0
   const int64_t f0g5 = f0 * static_cast<int64_t>(g5);
158
0
   const int64_t f0g6 = f0 * static_cast<int64_t>(g6);
159
0
   const int64_t f0g7 = f0 * static_cast<int64_t>(g7);
160
0
   const int64_t f0g8 = f0 * static_cast<int64_t>(g8);
161
0
   const int64_t f0g9 = f0 * static_cast<int64_t>(g9);
162
0
   const int64_t f1g0 = f1 * static_cast<int64_t>(g0);
163
0
   const int64_t f1g1_2 = f1_2 * static_cast<int64_t>(g1);
164
0
   const int64_t f1g2 = f1 * static_cast<int64_t>(g2);
165
0
   const int64_t f1g3_2 = f1_2 * static_cast<int64_t>(g3);
166
0
   const int64_t f1g4 = f1 * static_cast<int64_t>(g4);
167
0
   const int64_t f1g5_2 = f1_2 * static_cast<int64_t>(g5);
168
0
   const int64_t f1g6 = f1 * static_cast<int64_t>(g6);
169
0
   const int64_t f1g7_2 = f1_2 * static_cast<int64_t>(g7);
170
0
   const int64_t f1g8 = f1 * static_cast<int64_t>(g8);
171
0
   const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
172
0
   const int64_t f2g0 = f2 * static_cast<int64_t>(g0);
173
0
   const int64_t f2g1 = f2 * static_cast<int64_t>(g1);
174
0
   const int64_t f2g2 = f2 * static_cast<int64_t>(g2);
175
0
   const int64_t f2g3 = f2 * static_cast<int64_t>(g3);
176
0
   const int64_t f2g4 = f2 * static_cast<int64_t>(g4);
177
0
   const int64_t f2g5 = f2 * static_cast<int64_t>(g5);
178
0
   const int64_t f2g6 = f2 * static_cast<int64_t>(g6);
179
0
   const int64_t f2g7 = f2 * static_cast<int64_t>(g7);
180
0
   const int64_t f2g8_19 = f2 * static_cast<int64_t>(g8_19);
181
0
   const int64_t f2g9_19 = f2 * static_cast<int64_t>(g9_19);
182
0
   const int64_t f3g0 = f3 * static_cast<int64_t>(g0);
183
0
   const int64_t f3g1_2 = f3_2 * static_cast<int64_t>(g1);
184
0
   const int64_t f3g2 = f3 * static_cast<int64_t>(g2);
185
0
   const int64_t f3g3_2 = f3_2 * static_cast<int64_t>(g3);
186
0
   const int64_t f3g4 = f3 * static_cast<int64_t>(g4);
187
0
   const int64_t f3g5_2 = f3_2 * static_cast<int64_t>(g5);
188
0
   const int64_t f3g6 = f3 * static_cast<int64_t>(g6);
189
0
   const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
190
0
   const int64_t f3g8_19 = f3 * static_cast<int64_t>(g8_19);
191
0
   const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
192
0
   const int64_t f4g0 = f4 * static_cast<int64_t>(g0);
193
0
   const int64_t f4g1 = f4 * static_cast<int64_t>(g1);
194
0
   const int64_t f4g2 = f4 * static_cast<int64_t>(g2);
195
0
   const int64_t f4g3 = f4 * static_cast<int64_t>(g3);
196
0
   const int64_t f4g4 = f4 * static_cast<int64_t>(g4);
197
0
   const int64_t f4g5 = f4 * static_cast<int64_t>(g5);
198
0
   const int64_t f4g6_19 = f4 * static_cast<int64_t>(g6_19);
199
0
   const int64_t f4g7_19 = f4 * static_cast<int64_t>(g7_19);
200
0
   const int64_t f4g8_19 = f4 * static_cast<int64_t>(g8_19);
201
0
   const int64_t f4g9_19 = f4 * static_cast<int64_t>(g9_19);
202
0
   const int64_t f5g0 = f5 * static_cast<int64_t>(g0);
203
0
   const int64_t f5g1_2 = f5_2 * static_cast<int64_t>(g1);
204
0
   const int64_t f5g2 = f5 * static_cast<int64_t>(g2);
205
0
   const int64_t f5g3_2 = f5_2 * static_cast<int64_t>(g3);
206
0
   const int64_t f5g4 = f5 * static_cast<int64_t>(g4);
207
0
   const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
208
0
   const int64_t f5g6_19 = f5 * static_cast<int64_t>(g6_19);
209
0
   const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
210
0
   const int64_t f5g8_19 = f5 * static_cast<int64_t>(g8_19);
211
0
   const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
212
0
   const int64_t f6g0 = f6 * static_cast<int64_t>(g0);
213
0
   const int64_t f6g1 = f6 * static_cast<int64_t>(g1);
214
0
   const int64_t f6g2 = f6 * static_cast<int64_t>(g2);
215
0
   const int64_t f6g3 = f6 * static_cast<int64_t>(g3);
216
0
   const int64_t f6g4_19 = f6 * static_cast<int64_t>(g4_19);
217
0
   const int64_t f6g5_19 = f6 * static_cast<int64_t>(g5_19);
218
0
   const int64_t f6g6_19 = f6 * static_cast<int64_t>(g6_19);
219
0
   const int64_t f6g7_19 = f6 * static_cast<int64_t>(g7_19);
220
0
   const int64_t f6g8_19 = f6 * static_cast<int64_t>(g8_19);
221
0
   const int64_t f6g9_19 = f6 * static_cast<int64_t>(g9_19);
222
0
   const int64_t f7g0 = f7 * static_cast<int64_t>(g0);
223
0
   const int64_t f7g1_2 = f7_2 * static_cast<int64_t>(g1);
224
0
   const int64_t f7g2 = f7 * static_cast<int64_t>(g2);
225
0
   const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
226
0
   const int64_t f7g4_19 = f7 * static_cast<int64_t>(g4_19);
227
0
   const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
228
0
   const int64_t f7g6_19 = f7 * static_cast<int64_t>(g6_19);
229
0
   const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
230
0
   const int64_t f7g8_19 = f7 * static_cast<int64_t>(g8_19);
231
0
   const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
232
0
   const int64_t f8g0 = f8 * static_cast<int64_t>(g0);
233
0
   const int64_t f8g1 = f8 * static_cast<int64_t>(g1);
234
0
   const int64_t f8g2_19 = f8 * static_cast<int64_t>(g2_19);
235
0
   const int64_t f8g3_19 = f8 * static_cast<int64_t>(g3_19);
236
0
   const int64_t f8g4_19 = f8 * static_cast<int64_t>(g4_19);
237
0
   const int64_t f8g5_19 = f8 * static_cast<int64_t>(g5_19);
238
0
   const int64_t f8g6_19 = f8 * static_cast<int64_t>(g6_19);
239
0
   const int64_t f8g7_19 = f8 * static_cast<int64_t>(g7_19);
240
0
   const int64_t f8g8_19 = f8 * static_cast<int64_t>(g8_19);
241
0
   const int64_t f8g9_19 = f8 * static_cast<int64_t>(g9_19);
242
0
   const int64_t f9g0 = f9 * static_cast<int64_t>(g0);
243
0
   const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
244
0
   const int64_t f9g2_19 = f9 * static_cast<int64_t>(g2_19);
245
0
   const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
246
0
   const int64_t f9g4_19 = f9 * static_cast<int64_t>(g4_19);
247
0
   const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
248
0
   const int64_t f9g6_19 = f9 * static_cast<int64_t>(g6_19);
249
0
   const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
250
0
   const int64_t f9g8_19 = f9 * static_cast<int64_t>(g8_19);
251
0
   const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
252
253
0
   int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
254
0
   int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
255
0
   int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
256
0
   int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
257
0
   int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
258
0
   int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
259
0
   int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
260
0
   int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
261
0
   int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
262
0
   int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
263
264
   /*
265
   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
266
   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
267
   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
268
   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
269
   */
270
0
   carry<26>(h0, h1);
271
0
   carry<26>(h4, h5);
272
273
   /* |h0| <= 2^25 */
274
   /* |h4| <= 2^25 */
275
   /* |h1| <= 1.71*2^59 */
276
   /* |h5| <= 1.71*2^59 */
277
278
0
   carry<25>(h1, h2);
279
0
   carry<25>(h5, h6);
280
281
   /* |h1| <= 2^24; from now on fits into int32 */
282
   /* |h5| <= 2^24; from now on fits into int32 */
283
   /* |h2| <= 1.41*2^60 */
284
   /* |h6| <= 1.41*2^60 */
285
286
0
   carry<26>(h2, h3);
287
0
   carry<26>(h6, h7);
288
   /* |h2| <= 2^25; from now on fits into int32 unchanged */
289
   /* |h6| <= 2^25; from now on fits into int32 unchanged */
290
   /* |h3| <= 1.71*2^59 */
291
   /* |h7| <= 1.71*2^59 */
292
293
0
   carry<25>(h3, h4);
294
0
   carry<25>(h7, h8);
295
   /* |h3| <= 2^24; from now on fits into int32 unchanged */
296
   /* |h7| <= 2^24; from now on fits into int32 unchanged */
297
   /* |h4| <= 1.72*2^34 */
298
   /* |h8| <= 1.41*2^60 */
299
300
0
   carry<26>(h4, h5);
301
0
   carry<26>(h8, h9);
302
   /* |h4| <= 2^25; from now on fits into int32 unchanged */
303
   /* |h8| <= 2^25; from now on fits into int32 unchanged */
304
   /* |h5| <= 1.01*2^24 */
305
   /* |h9| <= 1.71*2^59 */
306
307
0
   carry<25, 19>(h9, h0);
308
309
   /* |h9| <= 2^24; from now on fits into int32 unchanged */
310
   /* |h0| <= 1.1*2^39 */
311
312
0
   carry<26>(h0, h1);
313
   /* |h0| <= 2^25; from now on fits into int32 unchanged */
314
   /* |h1| <= 1.01*2^24 */
315
316
0
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
317
0
}
318
319
/*
320
h = f * f
321
Can overlap h with f.
322
323
Preconditions:
324
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
325
326
Postconditions:
327
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
328
*/
329
330
/*
331
See fe_mul.c for discussion of implementation strategy.
332
*/
333
334
//static
335
0
FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter) {
336
0
   int32_t f0 = f[0];
337
0
   int32_t f1 = f[1];
338
0
   int32_t f2 = f[2];
339
0
   int32_t f3 = f[3];
340
0
   int32_t f4 = f[4];
341
0
   int32_t f5 = f[5];
342
0
   int32_t f6 = f[6];
343
0
   int32_t f7 = f[7];
344
0
   int32_t f8 = f[8];
345
0
   int32_t f9 = f[9];
346
347
0
   for(size_t i = 0; i != iter; ++i) {
348
0
      const int32_t f0_2 = 2 * f0;
349
0
      const int32_t f1_2 = 2 * f1;
350
0
      const int32_t f2_2 = 2 * f2;
351
0
      const int32_t f3_2 = 2 * f3;
352
0
      const int32_t f4_2 = 2 * f4;
353
0
      const int32_t f5_2 = 2 * f5;
354
0
      const int32_t f6_2 = 2 * f6;
355
0
      const int32_t f7_2 = 2 * f7;
356
0
      const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
357
0
      const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
358
0
      const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
359
0
      const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
360
0
      const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
361
362
0
      const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
363
0
      const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
364
0
      const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
365
0
      const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
366
0
      const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
367
0
      const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
368
0
      const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
369
0
      const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
370
0
      const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
371
0
      const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
372
0
      const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
373
0
      const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
374
0
      const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
375
0
      const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
376
0
      const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
377
0
      const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
378
0
      const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
379
0
      const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
380
0
      const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
381
0
      const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
382
0
      const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
383
0
      const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
384
0
      const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
385
0
      const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
386
0
      const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
387
0
      const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
388
0
      const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
389
0
      const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
390
0
      const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
391
0
      const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
392
0
      const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
393
0
      const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
394
0
      const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
395
0
      const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
396
0
      const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
397
0
      const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
398
0
      const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
399
0
      const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
400
0
      const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
401
0
      const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
402
0
      const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
403
0
      const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
404
0
      const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
405
0
      const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
406
0
      const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
407
0
      const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
408
0
      const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
409
0
      const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
410
0
      const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
411
0
      const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
412
0
      const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
413
0
      const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
414
0
      const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
415
0
      const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
416
0
      const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
417
418
0
      int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
419
0
      int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
420
0
      int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
421
0
      int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
422
0
      int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
423
0
      int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
424
0
      int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
425
0
      int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
426
0
      int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
427
0
      int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
428
429
0
      carry<26>(h0, h1);
430
0
      carry<26>(h4, h5);
431
0
      carry<25>(h1, h2);
432
0
      carry<25>(h5, h6);
433
0
      carry<26>(h2, h3);
434
0
      carry<26>(h6, h7);
435
436
0
      carry<25>(h3, h4);
437
0
      carry<25>(h7, h8);
438
439
0
      carry<26>(h4, h5);
440
0
      carry<26>(h8, h9);
441
0
      carry<25, 19>(h9, h0);
442
0
      carry<26>(h0, h1);
443
444
0
      f0 = static_cast<int32_t>(h0);
445
0
      f1 = static_cast<int32_t>(h1);
446
0
      f2 = static_cast<int32_t>(h2);
447
0
      f3 = static_cast<int32_t>(h3);
448
0
      f4 = static_cast<int32_t>(h4);
449
0
      f5 = static_cast<int32_t>(h5);
450
0
      f6 = static_cast<int32_t>(h6);
451
0
      f7 = static_cast<int32_t>(h7);
452
0
      f8 = static_cast<int32_t>(h8);
453
0
      f9 = static_cast<int32_t>(h9);
454
0
   }
455
456
0
   return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
457
0
}
458
459
/*
460
h = 2 * f * f
461
Can overlap h with f.
462
463
Preconditions:
464
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
465
466
Postconditions:
467
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
468
*/
469
470
/*
471
See fe_mul.c for discussion of implementation strategy.
472
*/
473
474
//static
475
0
FE_25519 FE_25519::sqr2(const FE_25519& f) {
476
0
   const int32_t f0 = f[0];
477
0
   const int32_t f1 = f[1];
478
0
   const int32_t f2 = f[2];
479
0
   const int32_t f3 = f[3];
480
0
   const int32_t f4 = f[4];
481
0
   const int32_t f5 = f[5];
482
0
   const int32_t f6 = f[6];
483
0
   const int32_t f7 = f[7];
484
0
   const int32_t f8 = f[8];
485
0
   const int32_t f9 = f[9];
486
0
   const int32_t f0_2 = 2 * f0;
487
0
   const int32_t f1_2 = 2 * f1;
488
0
   const int32_t f2_2 = 2 * f2;
489
0
   const int32_t f3_2 = 2 * f3;
490
0
   const int32_t f4_2 = 2 * f4;
491
0
   const int32_t f5_2 = 2 * f5;
492
0
   const int32_t f6_2 = 2 * f6;
493
0
   const int32_t f7_2 = 2 * f7;
494
0
   const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
495
0
   const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
496
0
   const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
497
0
   const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
498
0
   const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
499
0
   const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
500
0
   const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
501
0
   const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
502
0
   const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
503
0
   const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
504
0
   const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
505
0
   const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
506
0
   const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
507
0
   const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
508
0
   const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
509
0
   const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
510
0
   const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
511
0
   const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
512
0
   const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
513
0
   const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
514
0
   const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
515
0
   const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
516
0
   const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
517
0
   const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
518
0
   const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
519
0
   const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
520
0
   const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
521
0
   const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
522
0
   const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
523
0
   const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
524
0
   const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
525
0
   const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
526
0
   const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
527
0
   const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
528
0
   const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
529
0
   const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
530
0
   const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
531
0
   const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
532
0
   const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
533
0
   const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
534
0
   const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
535
0
   const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
536
0
   const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
537
0
   const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
538
0
   const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
539
0
   const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
540
0
   const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
541
0
   const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
542
0
   const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
543
0
   const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
544
0
   const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
545
0
   const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
546
0
   const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
547
0
   const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
548
0
   const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
549
0
   const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
550
0
   const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
551
0
   const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
552
0
   const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
553
0
   const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
554
555
0
   int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
556
0
   int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
557
0
   int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
558
0
   int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
559
0
   int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
560
0
   int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
561
0
   int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
562
0
   int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
563
0
   int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
564
0
   int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
565
566
0
   h0 += h0;
567
0
   h1 += h1;
568
0
   h2 += h2;
569
0
   h3 += h3;
570
0
   h4 += h4;
571
0
   h5 += h5;
572
0
   h6 += h6;
573
0
   h7 += h7;
574
0
   h8 += h8;
575
0
   h9 += h9;
576
577
0
   carry<26>(h0, h1);
578
0
   carry<26>(h4, h5);
579
580
0
   carry<25>(h1, h2);
581
0
   carry<25>(h5, h6);
582
583
0
   carry<26>(h2, h3);
584
0
   carry<26>(h6, h7);
585
586
0
   carry<25>(h3, h4);
587
0
   carry<25>(h7, h8);
588
0
   carry<26>(h4, h5);
589
0
   carry<26>(h8, h9);
590
0
   carry<25, 19>(h9, h0);
591
0
   carry<26>(h0, h1);
592
593
0
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
594
0
}
595
596
/*
597
Ignores top bit of h.
598
*/
599
600
0
void FE_25519::from_bytes(const uint8_t s[32]) {
601
0
   int64_t h0 = load_4(s);
602
0
   int64_t h1 = load_3(s + 4) << 6;
603
0
   int64_t h2 = load_3(s + 7) << 5;
604
0
   int64_t h3 = load_3(s + 10) << 3;
605
0
   int64_t h4 = load_3(s + 13) << 2;
606
0
   int64_t h5 = load_4(s + 16);
607
0
   int64_t h6 = load_3(s + 20) << 7;
608
0
   int64_t h7 = load_3(s + 23) << 5;
609
0
   int64_t h8 = load_3(s + 26) << 4;
610
0
   int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
611
612
0
   carry<25, 19>(h9, h0);
613
0
   carry<25>(h1, h2);
614
0
   carry<25>(h3, h4);
615
0
   carry<25>(h5, h6);
616
0
   carry<25>(h7, h8);
617
618
0
   carry<26>(h0, h1);
619
0
   carry<26>(h2, h3);
620
0
   carry<26>(h4, h5);
621
0
   carry<26>(h6, h7);
622
0
   carry<26>(h8, h9);
623
624
0
   m_fe[0] = static_cast<int32_t>(h0);
625
0
   m_fe[1] = static_cast<int32_t>(h1);
626
0
   m_fe[2] = static_cast<int32_t>(h2);
627
0
   m_fe[3] = static_cast<int32_t>(h3);
628
0
   m_fe[4] = static_cast<int32_t>(h4);
629
0
   m_fe[5] = static_cast<int32_t>(h5);
630
0
   m_fe[6] = static_cast<int32_t>(h6);
631
0
   m_fe[7] = static_cast<int32_t>(h7);
632
0
   m_fe[8] = static_cast<int32_t>(h8);
633
0
   m_fe[9] = static_cast<int32_t>(h9);
634
0
}
635
636
/*
637
Preconditions:
638
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
639
640
Write p=2^255-19; q=floor(h/p).
641
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
642
643
Proof:
644
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
645
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
646
647
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
648
Then 0<y<1.
649
650
Write r=h-pq.
651
Have 0<=r<=p-1=2^255-20.
652
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
653
654
Write x=r+19(2^-255)r+y.
655
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
656
657
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
658
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
659
*/
660
661
0
void FE_25519::to_bytes(uint8_t s[32]) const {
662
0
   const int64_t X25 = (1 << 25);
663
664
0
   int32_t h0 = m_fe[0];
665
0
   int32_t h1 = m_fe[1];
666
0
   int32_t h2 = m_fe[2];
667
0
   int32_t h3 = m_fe[3];
668
0
   int32_t h4 = m_fe[4];
669
0
   int32_t h5 = m_fe[5];
670
0
   int32_t h6 = m_fe[6];
671
0
   int32_t h7 = m_fe[7];
672
0
   int32_t h8 = m_fe[8];
673
0
   int32_t h9 = m_fe[9];
674
0
   int32_t q;
675
676
0
   q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
677
0
   q = (h0 + q) >> 26;
678
0
   q = (h1 + q) >> 25;
679
0
   q = (h2 + q) >> 26;
680
0
   q = (h3 + q) >> 25;
681
0
   q = (h4 + q) >> 26;
682
0
   q = (h5 + q) >> 25;
683
0
   q = (h6 + q) >> 26;
684
0
   q = (h7 + q) >> 25;
685
0
   q = (h8 + q) >> 26;
686
0
   q = (h9 + q) >> 25;
687
688
   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
689
0
   h0 += 19 * q;
690
   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
691
692
0
   carry0<26>(h0, h1);
693
0
   carry0<25>(h1, h2);
694
0
   carry0<26>(h2, h3);
695
0
   carry0<25>(h3, h4);
696
0
   carry0<26>(h4, h5);
697
0
   carry0<25>(h5, h6);
698
0
   carry0<26>(h6, h7);
699
0
   carry0<25>(h7, h8);
700
0
   carry0<26>(h8, h9);
701
702
0
   int32_t carry9 = h9 >> 25;
703
0
   h9 -= carry9 * X25;
704
   /* h10 = carry9 */
705
706
   /*
707
   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
708
   Have h0+...+2^230 h9 between 0 and 2^255-1;
709
   evidently 2^255 h10-2^255 q = 0.
710
   Goal: Output h0+...+2^230 h9.
711
   */
712
713
0
   s[0] = static_cast<uint8_t>(h0 >> 0);
714
0
   s[1] = static_cast<uint8_t>(h0 >> 8);
715
0
   s[2] = static_cast<uint8_t>(h0 >> 16);
716
0
   s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2));
717
0
   s[4] = static_cast<uint8_t>(h1 >> 6);
718
0
   s[5] = static_cast<uint8_t>(h1 >> 14);
719
0
   s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3));
720
0
   s[7] = static_cast<uint8_t>(h2 >> 5);
721
0
   s[8] = static_cast<uint8_t>(h2 >> 13);
722
0
   s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5));
723
0
   s[10] = static_cast<uint8_t>(h3 >> 3);
724
0
   s[11] = static_cast<uint8_t>(h3 >> 11);
725
0
   s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6));
726
0
   s[13] = static_cast<uint8_t>(h4 >> 2);
727
0
   s[14] = static_cast<uint8_t>(h4 >> 10);
728
0
   s[15] = static_cast<uint8_t>(h4 >> 18);
729
0
   s[16] = static_cast<uint8_t>(h5 >> 0);
730
0
   s[17] = static_cast<uint8_t>(h5 >> 8);
731
0
   s[18] = static_cast<uint8_t>(h5 >> 16);
732
0
   s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1));
733
0
   s[20] = static_cast<uint8_t>(h6 >> 7);
734
0
   s[21] = static_cast<uint8_t>(h6 >> 15);
735
0
   s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3));
736
0
   s[23] = static_cast<uint8_t>(h7 >> 5);
737
0
   s[24] = static_cast<uint8_t>(h7 >> 13);
738
0
   s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4));
739
0
   s[26] = static_cast<uint8_t>(h8 >> 4);
740
0
   s[27] = static_cast<uint8_t>(h8 >> 12);
741
0
   s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6));
742
0
   s[29] = static_cast<uint8_t>(h9 >> 2);
743
0
   s[30] = static_cast<uint8_t>(h9 >> 10);
744
0
   s[31] = static_cast<uint8_t>(h9 >> 18);
745
0
}
746
747
}  // namespace Botan