Coverage Report

Created: 2023-02-13 06:21

/src/botan/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Ed25519 field element
3
* (C) 2017 Ribose Inc
4
*
5
* Based on the public domain code from SUPERCOP ref10 by
6
* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10
11
#include <botan/internal/ed25519_fe.h>
12
#include <botan/internal/ed25519_internal.h>
13
14
namespace Botan {
15
16
//static
17
FE_25519 FE_25519::invert(const FE_25519& z)
18
2
   {
19
2
   FE_25519 t0;
20
2
   FE_25519 t1;
21
2
   FE_25519 t2;
22
2
   FE_25519 t3;
23
24
2
   fe_sq(t0, z);
25
2
   fe_sq_iter(t1, t0, 2);
26
2
   fe_mul(t1, z, t1);
27
2
   fe_mul(t0, t0, t1);
28
2
   fe_sq(t2, t0);
29
2
   fe_mul(t1, t1, t2);
30
2
   fe_sq_iter(t2, t1, 5);
31
2
   fe_mul(t1, t2, t1);
32
2
   fe_sq_iter(t2, t1, 10);
33
2
   fe_mul(t2, t2, t1);
34
2
   fe_sq_iter(t3, t2, 20);
35
2
   fe_mul(t2, t3, t2);
36
2
   fe_sq_iter(t2, t2, 10);
37
2
   fe_mul(t1, t2, t1);
38
2
   fe_sq_iter(t2, t1, 50);
39
2
   fe_mul(t2, t2, t1);
40
2
   fe_sq_iter(t3, t2, 100);
41
2
   fe_mul(t2, t3, t2);
42
2
   fe_sq_iter(t2, t2, 50);
43
2
   fe_mul(t1, t2, t1);
44
2
   fe_sq_iter(t1, t1, 5);
45
46
2
   fe_mul(t0, t1, t0);
47
2
   return t0;
48
2
   }
49
50
FE_25519 FE_25519::pow_22523(const fe& z)
51
0
   {
52
0
   FE_25519 t0;
53
0
   FE_25519 t1;
54
0
   FE_25519 t2;
55
56
0
   fe_sq(t0, z);
57
0
   fe_sq_iter(t1, t0, 2);
58
0
   fe_mul(t1, z, t1);
59
0
   fe_mul(t0, t0, t1);
60
0
   fe_sq(t0, t0);
61
0
   fe_mul(t0, t1, t0);
62
0
   fe_sq_iter(t1, t0, 5);
63
0
   fe_mul(t0, t1, t0);
64
0
   fe_sq_iter(t1, t0, 10);
65
0
   fe_mul(t1, t1, t0);
66
0
   fe_sq_iter(t2, t1, 20);
67
0
   fe_mul(t1, t2, t1);
68
0
   fe_sq_iter(t1, t1, 10);
69
0
   fe_mul(t0, t1, t0);
70
0
   fe_sq_iter(t1, t0, 50);
71
0
   fe_mul(t1, t1, t0);
72
0
   fe_sq_iter(t2, t1, 100);
73
0
   fe_mul(t1, t2, t1);
74
0
   fe_sq_iter(t1, t1, 50);
75
0
   fe_mul(t0, t1, t0);
76
0
   fe_sq_iter(t0, t0, 2);
77
78
0
   fe_mul(t0, t0, z);
79
0
   return t0;
80
0
   }
81
82
/*
83
h = f * g
84
Can overlap h with f or g.
85
86
Preconditions:
87
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
89
90
Postconditions:
91
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
92
*/
93
94
/*
95
Notes on implementation strategy:
96
97
Using schoolbook multiplication.
98
Karatsuba would save a little in some cost models.
99
100
Most multiplications by 2 and 19 are 32-bit precomputations;
101
cheaper than 64-bit postcomputations.
102
103
There is one remaining multiplication by 19 in the carry chain;
104
one *19 precomputation can be merged into this,
105
but the resulting data flow is considerably less clean.
106
107
There are 12 carries below.
108
10 of them are 2-way parallelizable and vectorizable.
109
Can get away with 11 carries, but then data flow is much deeper.
110
111
With tighter constraints on inputs can squeeze carries into int32.
112
*/
113
114
//static
115
FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g)
116
948
   {
117
948
   const int32_t f0 = f[0];
118
948
   const int32_t f1 = f[1];
119
948
   const int32_t f2 = f[2];
120
948
   const int32_t f3 = f[3];
121
948
   const int32_t f4 = f[4];
122
948
   const int32_t f5 = f[5];
123
948
   const int32_t f6 = f[6];
124
948
   const int32_t f7 = f[7];
125
948
   const int32_t f8 = f[8];
126
948
   const int32_t f9 = f[9];
127
128
948
   const int32_t g0 = g[0];
129
948
   const int32_t g1 = g[1];
130
948
   const int32_t g2 = g[2];
131
948
   const int32_t g3 = g[3];
132
948
   const int32_t g4 = g[4];
133
948
   const int32_t g5 = g[5];
134
948
   const int32_t g6 = g[6];
135
948
   const int32_t g7 = g[7];
136
948
   const int32_t g8 = g[8];
137
948
   const int32_t g9 = g[9];
138
139
948
   const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
140
948
   const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
141
948
   const int32_t g3_19 = 19 * g3;
142
948
   const int32_t g4_19 = 19 * g4;
143
948
   const int32_t g5_19 = 19 * g5;
144
948
   const int32_t g6_19 = 19 * g6;
145
948
   const int32_t g7_19 = 19 * g7;
146
948
   const int32_t g8_19 = 19 * g8;
147
948
   const int32_t g9_19 = 19 * g9;
148
948
   const int32_t f1_2 = 2 * f1;
149
948
   const int32_t f3_2 = 2 * f3;
150
948
   const int32_t f5_2 = 2 * f5;
151
948
   const int32_t f7_2 = 2 * f7;
152
948
   const int32_t f9_2 = 2 * f9;
153
154
948
   const int64_t f0g0    = f0   * static_cast<int64_t>(g0);
155
948
   const int64_t f0g1    = f0   * static_cast<int64_t>(g1);
156
948
   const int64_t f0g2    = f0   * static_cast<int64_t>(g2);
157
948
   const int64_t f0g3    = f0   * static_cast<int64_t>(g3);
158
948
   const int64_t f0g4    = f0   * static_cast<int64_t>(g4);
159
948
   const int64_t f0g5    = f0   * static_cast<int64_t>(g5);
160
948
   const int64_t f0g6    = f0   * static_cast<int64_t>(g6);
161
948
   const int64_t f0g7    = f0   * static_cast<int64_t>(g7);
162
948
   const int64_t f0g8    = f0   * static_cast<int64_t>(g8);
163
948
   const int64_t f0g9    = f0   * static_cast<int64_t>(g9);
164
948
   const int64_t f1g0    = f1   * static_cast<int64_t>(g0);
165
948
   const int64_t f1g1_2  = f1_2 * static_cast<int64_t>(g1);
166
948
   const int64_t f1g2    = f1   * static_cast<int64_t>(g2);
167
948
   const int64_t f1g3_2  = f1_2 * static_cast<int64_t>(g3);
168
948
   const int64_t f1g4    = f1   * static_cast<int64_t>(g4);
169
948
   const int64_t f1g5_2  = f1_2 * static_cast<int64_t>(g5);
170
948
   const int64_t f1g6    = f1   * static_cast<int64_t>(g6);
171
948
   const int64_t f1g7_2  = f1_2 * static_cast<int64_t>(g7);
172
948
   const int64_t f1g8    = f1   * static_cast<int64_t>(g8);
173
948
   const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
174
948
   const int64_t f2g0    = f2   * static_cast<int64_t>(g0);
175
948
   const int64_t f2g1    = f2   * static_cast<int64_t>(g1);
176
948
   const int64_t f2g2    = f2   * static_cast<int64_t>(g2);
177
948
   const int64_t f2g3    = f2   * static_cast<int64_t>(g3);
178
948
   const int64_t f2g4    = f2   * static_cast<int64_t>(g4);
179
948
   const int64_t f2g5    = f2   * static_cast<int64_t>(g5);
180
948
   const int64_t f2g6    = f2   * static_cast<int64_t>(g6);
181
948
   const int64_t f2g7    = f2   * static_cast<int64_t>(g7);
182
948
   const int64_t f2g8_19 = f2   * static_cast<int64_t>(g8_19);
183
948
   const int64_t f2g9_19 = f2   * static_cast<int64_t>(g9_19);
184
948
   const int64_t f3g0    = f3   * static_cast<int64_t>(g0);
185
948
   const int64_t f3g1_2  = f3_2 * static_cast<int64_t>(g1);
186
948
   const int64_t f3g2    = f3   * static_cast<int64_t>(g2);
187
948
   const int64_t f3g3_2  = f3_2 * static_cast<int64_t>(g3);
188
948
   const int64_t f3g4    = f3   * static_cast<int64_t>(g4);
189
948
   const int64_t f3g5_2  = f3_2 * static_cast<int64_t>(g5);
190
948
   const int64_t f3g6    = f3   * static_cast<int64_t>(g6);
191
948
   const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
192
948
   const int64_t f3g8_19 = f3   * static_cast<int64_t>(g8_19);
193
948
   const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
194
948
   const int64_t f4g0    = f4   * static_cast<int64_t>(g0);
195
948
   const int64_t f4g1    = f4   * static_cast<int64_t>(g1);
196
948
   const int64_t f4g2    = f4   * static_cast<int64_t>(g2);
197
948
   const int64_t f4g3    = f4   * static_cast<int64_t>(g3);
198
948
   const int64_t f4g4    = f4   * static_cast<int64_t>(g4);
199
948
   const int64_t f4g5    = f4   * static_cast<int64_t>(g5);
200
948
   const int64_t f4g6_19 = f4   * static_cast<int64_t>(g6_19);
201
948
   const int64_t f4g7_19 = f4   * static_cast<int64_t>(g7_19);
202
948
   const int64_t f4g8_19 = f4   * static_cast<int64_t>(g8_19);
203
948
   const int64_t f4g9_19 = f4   * static_cast<int64_t>(g9_19);
204
948
   const int64_t f5g0    = f5   * static_cast<int64_t>(g0);
205
948
   const int64_t f5g1_2  = f5_2 * static_cast<int64_t>(g1);
206
948
   const int64_t f5g2    = f5   * static_cast<int64_t>(g2);
207
948
   const int64_t f5g3_2  = f5_2 * static_cast<int64_t>(g3);
208
948
   const int64_t f5g4    = f5   * static_cast<int64_t>(g4);
209
948
   const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
210
948
   const int64_t f5g6_19 = f5   * static_cast<int64_t>(g6_19);
211
948
   const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
212
948
   const int64_t f5g8_19 = f5   * static_cast<int64_t>(g8_19);
213
948
   const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
214
948
   const int64_t f6g0    = f6   * static_cast<int64_t>(g0);
215
948
   const int64_t f6g1    = f6   * static_cast<int64_t>(g1);
216
948
   const int64_t f6g2    = f6   * static_cast<int64_t>(g2);
217
948
   const int64_t f6g3    = f6   * static_cast<int64_t>(g3);
218
948
   const int64_t f6g4_19 = f6   * static_cast<int64_t>(g4_19);
219
948
   const int64_t f6g5_19 = f6   * static_cast<int64_t>(g5_19);
220
948
   const int64_t f6g6_19 = f6   * static_cast<int64_t>(g6_19);
221
948
   const int64_t f6g7_19 = f6   * static_cast<int64_t>(g7_19);
222
948
   const int64_t f6g8_19 = f6   * static_cast<int64_t>(g8_19);
223
948
   const int64_t f6g9_19 = f6   * static_cast<int64_t>(g9_19);
224
948
   const int64_t f7g0    = f7   * static_cast<int64_t>(g0);
225
948
   const int64_t f7g1_2  = f7_2 * static_cast<int64_t>(g1);
226
948
   const int64_t f7g2    = f7   * static_cast<int64_t>(g2);
227
948
   const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
228
948
   const int64_t f7g4_19 = f7   * static_cast<int64_t>(g4_19);
229
948
   const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
230
948
   const int64_t f7g6_19 = f7   * static_cast<int64_t>(g6_19);
231
948
   const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
232
948
   const int64_t f7g8_19 = f7   * static_cast<int64_t>(g8_19);
233
948
   const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
234
948
   const int64_t f8g0    = f8   * static_cast<int64_t>(g0);
235
948
   const int64_t f8g1    = f8   * static_cast<int64_t>(g1);
236
948
   const int64_t f8g2_19 = f8   * static_cast<int64_t>(g2_19);
237
948
   const int64_t f8g3_19 = f8   * static_cast<int64_t>(g3_19);
238
948
   const int64_t f8g4_19 = f8   * static_cast<int64_t>(g4_19);
239
948
   const int64_t f8g5_19 = f8   * static_cast<int64_t>(g5_19);
240
948
   const int64_t f8g6_19 = f8   * static_cast<int64_t>(g6_19);
241
948
   const int64_t f8g7_19 = f8   * static_cast<int64_t>(g7_19);
242
948
   const int64_t f8g8_19 = f8   * static_cast<int64_t>(g8_19);
243
948
   const int64_t f8g9_19 = f8   * static_cast<int64_t>(g9_19);
244
948
   const int64_t f9g0    = f9   * static_cast<int64_t>(g0);
245
948
   const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
246
948
   const int64_t f9g2_19 = f9   * static_cast<int64_t>(g2_19);
247
948
   const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
248
948
   const int64_t f9g4_19 = f9   * static_cast<int64_t>(g4_19);
249
948
   const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
250
948
   const int64_t f9g6_19 = f9   * static_cast<int64_t>(g6_19);
251
948
   const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
252
948
   const int64_t f9g8_19 = f9   * static_cast<int64_t>(g8_19);
253
948
   const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
254
255
948
   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
256
948
   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
257
948
   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
258
948
   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
259
948
   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
260
948
   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
261
948
   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
262
948
   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
263
948
   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
264
948
   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
265
266
   /*
267
   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
268
   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
269
   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
270
   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
271
   */
272
948
   carry<26>(h0, h1);
273
948
   carry<26>(h4, h5);
274
275
   /* |h0| <= 2^25 */
276
   /* |h4| <= 2^25 */
277
   /* |h1| <= 1.71*2^59 */
278
   /* |h5| <= 1.71*2^59 */
279
280
948
   carry<25>(h1, h2);
281
948
   carry<25>(h5, h6);
282
283
   /* |h1| <= 2^24; from now on fits into int32 */
284
   /* |h5| <= 2^24; from now on fits into int32 */
285
   /* |h2| <= 1.41*2^60 */
286
   /* |h6| <= 1.41*2^60 */
287
288
948
   carry<26>(h2, h3);
289
948
   carry<26>(h6, h7);
290
   /* |h2| <= 2^25; from now on fits into int32 unchanged */
291
   /* |h6| <= 2^25; from now on fits into int32 unchanged */
292
   /* |h3| <= 1.71*2^59 */
293
   /* |h7| <= 1.71*2^59 */
294
295
948
   carry<25>(h3, h4);
296
948
   carry<25>(h7, h8);
297
   /* |h3| <= 2^24; from now on fits into int32 unchanged */
298
   /* |h7| <= 2^24; from now on fits into int32 unchanged */
299
   /* |h4| <= 1.72*2^34 */
300
   /* |h8| <= 1.41*2^60 */
301
302
948
   carry<26>(h4, h5);
303
948
   carry<26>(h8, h9);
304
   /* |h4| <= 2^25; from now on fits into int32 unchanged */
305
   /* |h8| <= 2^25; from now on fits into int32 unchanged */
306
   /* |h5| <= 1.01*2^24 */
307
   /* |h9| <= 1.71*2^59 */
308
309
948
   carry<25, 19>(h9, h0);
310
311
   /* |h9| <= 2^24; from now on fits into int32 unchanged */
312
   /* |h0| <= 1.1*2^39 */
313
314
948
   carry<26>(h0, h1);
315
   /* |h0| <= 2^25; from now on fits into int32 unchanged */
316
   /* |h1| <= 1.01*2^24 */
317
318
948
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
319
948
   }
320
321
/*
322
h = f * f
323
Can overlap h with f.
324
325
Preconditions:
326
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
327
328
Postconditions:
329
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
330
*/
331
332
/*
333
See fe_mul.c for discussion of implementation strategy.
334
*/
335
336
//static
337
FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter)
338
46
   {
339
46
   int32_t f0 = f[0];
340
46
   int32_t f1 = f[1];
341
46
   int32_t f2 = f[2];
342
46
   int32_t f3 = f[3];
343
46
   int32_t f4 = f[4];
344
46
   int32_t f5 = f[5];
345
46
   int32_t f6 = f[6];
346
46
   int32_t f7 = f[7];
347
46
   int32_t f8 = f[8];
348
46
   int32_t f9 = f[9];
349
350
578
   for(size_t i = 0; i != iter; ++i)
351
532
      {
352
532
      const int32_t f0_2 = 2 * f0;
353
532
      const int32_t f1_2 = 2 * f1;
354
532
      const int32_t f2_2 = 2 * f2;
355
532
      const int32_t f3_2 = 2 * f3;
356
532
      const int32_t f4_2 = 2 * f4;
357
532
      const int32_t f5_2 = 2 * f5;
358
532
      const int32_t f6_2 = 2 * f6;
359
532
      const int32_t f7_2 = 2 * f7;
360
532
      const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
361
532
      const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
362
532
      const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
363
532
      const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
364
532
      const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
365
366
532
      const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
367
532
      const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
368
532
      const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
369
532
      const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
370
532
      const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
371
532
      const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
372
532
      const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
373
532
      const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
374
532
      const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
375
532
      const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
376
532
      const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
377
532
      const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
378
532
      const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
379
532
      const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
380
532
      const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
381
532
      const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
382
532
      const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
383
532
      const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
384
532
      const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
385
532
      const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
386
532
      const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
387
532
      const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
388
532
      const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
389
532
      const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
390
532
      const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
391
532
      const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
392
532
      const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
393
532
      const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
394
532
      const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
395
532
      const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
396
532
      const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
397
532
      const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
398
532
      const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
399
532
      const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
400
532
      const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
401
532
      const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
402
532
      const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
403
532
      const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
404
532
      const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
405
532
      const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
406
532
      const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
407
532
      const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
408
532
      const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
409
532
      const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
410
532
      const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
411
532
      const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
412
532
      const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
413
532
      const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
414
532
      const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
415
532
      const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
416
532
      const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
417
532
      const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
418
532
      const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
419
532
      const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
420
532
      const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
421
422
532
      int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
423
532
      int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
424
532
      int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
425
532
      int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
426
532
      int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
427
532
      int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
428
532
      int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
429
532
      int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
430
532
      int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
431
532
      int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
432
433
532
      carry<26>(h0, h1);
434
532
      carry<26>(h4, h5);
435
532
      carry<25>(h1, h2);
436
532
      carry<25>(h5, h6);
437
532
      carry<26>(h2, h3);
438
532
      carry<26>(h6, h7);
439
440
532
      carry<25>(h3, h4);
441
532
      carry<25>(h7, h8);
442
443
532
      carry<26>(h4, h5);
444
532
      carry<26>(h8, h9);
445
532
      carry<25,19>(h9, h0);
446
532
      carry<26>(h0, h1);
447
448
532
      f0 = static_cast<int32_t>(h0);
449
532
      f1 = static_cast<int32_t>(h1);
450
532
      f2 = static_cast<int32_t>(h2);
451
532
      f3 = static_cast<int32_t>(h3);
452
532
      f4 = static_cast<int32_t>(h4);
453
532
      f5 = static_cast<int32_t>(h5);
454
532
      f6 = static_cast<int32_t>(h6);
455
532
      f7 = static_cast<int32_t>(h7);
456
532
      f8 = static_cast<int32_t>(h8);
457
532
      f9 = static_cast<int32_t>(h9);
458
532
      }
459
460
46
   return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
461
46
   }
462
463
/*
464
h = 2 * f * f
465
Can overlap h with f.
466
467
Preconditions:
468
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
469
470
Postconditions:
471
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
472
*/
473
474
/*
475
See fe_mul.c for discussion of implementation strategy.
476
*/
477
478
//static
479
FE_25519 FE_25519::sqr2(const FE_25519& f)
480
8
   {
481
8
   const int32_t f0 = f[0];
482
8
   const int32_t f1 = f[1];
483
8
   const int32_t f2 = f[2];
484
8
   const int32_t f3 = f[3];
485
8
   const int32_t f4 = f[4];
486
8
   const int32_t f5 = f[5];
487
8
   const int32_t f6 = f[6];
488
8
   const int32_t f7 = f[7];
489
8
   const int32_t f8 = f[8];
490
8
   const int32_t f9 = f[9];
491
8
   const int32_t f0_2 = 2 * f0;
492
8
   const int32_t f1_2 = 2 * f1;
493
8
   const int32_t f2_2 = 2 * f2;
494
8
   const int32_t f3_2 = 2 * f3;
495
8
   const int32_t f4_2 = 2 * f4;
496
8
   const int32_t f5_2 = 2 * f5;
497
8
   const int32_t f6_2 = 2 * f6;
498
8
   const int32_t f7_2 = 2 * f7;
499
8
   const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
500
8
   const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
501
8
   const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
502
8
   const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
503
8
   const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
504
8
   const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
505
8
   const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
506
8
   const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
507
8
   const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
508
8
   const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
509
8
   const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
510
8
   const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
511
8
   const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
512
8
   const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
513
8
   const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
514
8
   const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
515
8
   const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
516
8
   const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
517
8
   const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
518
8
   const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
519
8
   const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
520
8
   const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
521
8
   const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
522
8
   const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
523
8
   const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
524
8
   const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
525
8
   const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
526
8
   const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
527
8
   const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
528
8
   const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
529
8
   const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
530
8
   const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
531
8
   const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
532
8
   const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
533
8
   const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
534
8
   const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
535
8
   const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
536
8
   const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
537
8
   const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
538
8
   const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
539
8
   const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
540
8
   const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
541
8
   const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
542
8
   const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
543
8
   const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
544
8
   const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
545
8
   const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
546
8
   const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
547
8
   const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
548
8
   const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
549
8
   const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
550
8
   const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
551
8
   const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
552
8
   const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
553
8
   const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
554
8
   const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
555
8
   const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
556
8
   const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
557
8
   const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
558
8
   const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
559
560
8
   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
561
8
   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
562
8
   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
563
8
   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
564
8
   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
565
8
   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
566
8
   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
567
8
   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
568
8
   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
569
8
   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
570
571
8
   h0 += h0;
572
8
   h1 += h1;
573
8
   h2 += h2;
574
8
   h3 += h3;
575
8
   h4 += h4;
576
8
   h5 += h5;
577
8
   h6 += h6;
578
8
   h7 += h7;
579
8
   h8 += h8;
580
8
   h9 += h9;
581
582
8
   carry<26>(h0, h1);
583
8
   carry<26>(h4, h5);
584
585
8
   carry<25>(h1, h2);
586
8
   carry<25>(h5, h6);
587
588
8
   carry<26>(h2, h3);
589
8
   carry<26>(h6, h7);
590
591
8
   carry<25>(h3, h4);
592
8
   carry<25>(h7, h8);
593
8
   carry<26>(h4, h5);
594
8
   carry<26>(h8, h9);
595
8
   carry<25,19>(h9, h0);
596
8
   carry<26>(h0, h1);
597
598
8
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
599
8
   }
600
601
/*
602
Ignores top bit of h.
603
*/
604
605
void FE_25519::from_bytes(const uint8_t s[32])
606
0
   {
607
0
   int64_t h0 = load_4(s);
608
0
   int64_t h1 = load_3(s + 4) << 6;
609
0
   int64_t h2 = load_3(s + 7) << 5;
610
0
   int64_t h3 = load_3(s + 10) << 3;
611
0
   int64_t h4 = load_3(s + 13) << 2;
612
0
   int64_t h5 = load_4(s + 16);
613
0
   int64_t h6 = load_3(s + 20) << 7;
614
0
   int64_t h7 = load_3(s + 23) << 5;
615
0
   int64_t h8 = load_3(s + 26) << 4;
616
0
   int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
617
618
0
   carry<25,19>(h9, h0);
619
0
   carry<25>(h1, h2);
620
0
   carry<25>(h3, h4);
621
0
   carry<25>(h5, h6);
622
0
   carry<25>(h7, h8);
623
624
0
   carry<26>(h0, h1);
625
0
   carry<26>(h2, h3);
626
0
   carry<26>(h4, h5);
627
0
   carry<26>(h6, h7);
628
0
   carry<26>(h8, h9);
629
630
0
   m_fe[0] = static_cast<int32_t>(h0);
631
0
   m_fe[1] = static_cast<int32_t>(h1);
632
0
   m_fe[2] = static_cast<int32_t>(h2);
633
0
   m_fe[3] = static_cast<int32_t>(h3);
634
0
   m_fe[4] = static_cast<int32_t>(h4);
635
0
   m_fe[5] = static_cast<int32_t>(h5);
636
0
   m_fe[6] = static_cast<int32_t>(h6);
637
0
   m_fe[7] = static_cast<int32_t>(h7);
638
0
   m_fe[8] = static_cast<int32_t>(h8);
639
0
   m_fe[9] = static_cast<int32_t>(h9);
640
0
   }
641
642
/*
643
Preconditions:
644
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
645
646
Write p=2^255-19; q=floor(h/p).
647
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
648
649
Proof:
650
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
651
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
652
653
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
654
Then 0<y<1.
655
656
Write r=h-pq.
657
Have 0<=r<=p-1=2^255-20.
658
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
659
660
Write x=r+19(2^-255)r+y.
661
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
662
663
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
664
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
665
*/
666
667
void FE_25519::to_bytes(uint8_t s[32]) const
668
4
   {
669
4
   const int64_t X25 = (1 << 25);
670
671
4
   int32_t h0 = m_fe[0];
672
4
   int32_t h1 = m_fe[1];
673
4
   int32_t h2 = m_fe[2];
674
4
   int32_t h3 = m_fe[3];
675
4
   int32_t h4 = m_fe[4];
676
4
   int32_t h5 = m_fe[5];
677
4
   int32_t h6 = m_fe[6];
678
4
   int32_t h7 = m_fe[7];
679
4
   int32_t h8 = m_fe[8];
680
4
   int32_t h9 = m_fe[9];
681
4
   int32_t q;
682
683
4
   q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
684
4
   q = (h0 + q) >> 26;
685
4
   q = (h1 + q) >> 25;
686
4
   q = (h2 + q) >> 26;
687
4
   q = (h3 + q) >> 25;
688
4
   q = (h4 + q) >> 26;
689
4
   q = (h5 + q) >> 25;
690
4
   q = (h6 + q) >> 26;
691
4
   q = (h7 + q) >> 25;
692
4
   q = (h8 + q) >> 26;
693
4
   q = (h9 + q) >> 25;
694
695
   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
696
4
   h0 += 19 * q;
697
   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
698
699
4
   carry0<26>(h0, h1);
700
4
   carry0<25>(h1, h2);
701
4
   carry0<26>(h2, h3);
702
4
   carry0<25>(h3, h4);
703
4
   carry0<26>(h4, h5);
704
4
   carry0<25>(h5, h6);
705
4
   carry0<26>(h6, h7);
706
4
   carry0<25>(h7, h8);
707
4
   carry0<26>(h8, h9);
708
709
4
   int32_t carry9 = h9 >> 25;
710
4
   h9 -= carry9 * X25;
711
   /* h10 = carry9 */
712
713
   /*
714
   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
715
   Have h0+...+2^230 h9 between 0 and 2^255-1;
716
   evidently 2^255 h10-2^255 q = 0.
717
   Goal: Output h0+...+2^230 h9.
718
   */
719
720
4
   s[0] = static_cast<uint8_t>(h0 >> 0);
721
4
   s[1] = static_cast<uint8_t>(h0 >> 8);
722
4
   s[2] = static_cast<uint8_t>(h0 >> 16);
723
4
   s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2));
724
4
   s[4] = static_cast<uint8_t>(h1 >> 6);
725
4
   s[5] = static_cast<uint8_t>(h1 >> 14);
726
4
   s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3));
727
4
   s[7] = static_cast<uint8_t>(h2 >> 5);
728
4
   s[8] = static_cast<uint8_t>(h2 >> 13);
729
4
   s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5));
730
4
   s[10] = static_cast<uint8_t>(h3 >> 3);
731
4
   s[11] = static_cast<uint8_t>(h3 >> 11);
732
4
   s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6));
733
4
   s[13] = static_cast<uint8_t>(h4 >> 2);
734
4
   s[14] = static_cast<uint8_t>(h4 >> 10);
735
4
   s[15] = static_cast<uint8_t>(h4 >> 18);
736
4
   s[16] = static_cast<uint8_t>(h5 >> 0);
737
4
   s[17] = static_cast<uint8_t>(h5 >> 8);
738
4
   s[18] = static_cast<uint8_t>(h5 >> 16);
739
4
   s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1));
740
4
   s[20] = static_cast<uint8_t>(h6 >> 7);
741
4
   s[21] = static_cast<uint8_t>(h6 >> 15);
742
4
   s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3));
743
4
   s[23] = static_cast<uint8_t>(h7 >> 5);
744
4
   s[24] = static_cast<uint8_t>(h7 >> 13);
745
4
   s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4));
746
4
   s[26] = static_cast<uint8_t>(h8 >> 4);
747
4
   s[27] = static_cast<uint8_t>(h8 >> 12);
748
4
   s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6));
749
4
   s[29] = static_cast<uint8_t>(h9 >> 2);
750
4
   s[30] = static_cast<uint8_t>(h9 >> 10);
751
4
   s[31] = static_cast<uint8_t>(h9 >> 18);
752
4
   }
753
754
}