Coverage Report

Created: 2021-10-13 08:49

/src/botan/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Ed25519 field element
3
* (C) 2017 Ribose Inc
4
*
5
* Based on the public domain code from SUPERCOP ref10 by
6
* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10
11
#include <botan/internal/ed25519_fe.h>
12
#include <botan/internal/ed25519_internal.h>
13
14
namespace Botan {
15
16
//static
17
FE_25519 FE_25519::invert(const FE_25519& z)
18
5
   {
19
5
   fe t0;
20
5
   fe t1;
21
5
   fe t2;
22
5
   fe t3;
23
24
5
   fe_sq(t0, z);
25
5
   fe_sq_iter(t1, t0, 2);
26
5
   fe_mul(t1, z, t1);
27
5
   fe_mul(t0, t0, t1);
28
5
   fe_sq(t2, t0);
29
5
   fe_mul(t1, t1, t2);
30
5
   fe_sq_iter(t2, t1, 5);
31
5
   fe_mul(t1, t2, t1);
32
5
   fe_sq_iter(t2, t1, 10);
33
5
   fe_mul(t2, t2, t1);
34
5
   fe_sq_iter(t3, t2, 20);
35
5
   fe_mul(t2, t3, t2);
36
5
   fe_sq_iter(t2, t2, 10);
37
5
   fe_mul(t1, t2, t1);
38
5
   fe_sq_iter(t2, t1, 50);
39
5
   fe_mul(t2, t2, t1);
40
5
   fe_sq_iter(t3, t2, 100);
41
5
   fe_mul(t2, t3, t2);
42
5
   fe_sq_iter(t2, t2, 50);
43
5
   fe_mul(t1, t2, t1);
44
5
   fe_sq_iter(t1, t1, 5);
45
46
5
   fe_mul(t0, t1, t0);
47
5
   return t0;
48
5
   }
49
50
FE_25519 FE_25519::pow_22523(const fe& z)
51
0
   {
52
0
   fe t0;
53
0
   fe t1;
54
0
   fe t2;
55
56
0
   fe_sq(t0, z);
57
0
   fe_sq_iter(t1, t0, 2);
58
0
   fe_mul(t1, z, t1);
59
0
   fe_mul(t0, t0, t1);
60
0
   fe_sq(t0, t0);
61
0
   fe_mul(t0, t1, t0);
62
0
   fe_sq_iter(t1, t0, 5);
63
0
   fe_mul(t0, t1, t0);
64
0
   fe_sq_iter(t1, t0, 10);
65
0
   fe_mul(t1, t1, t0);
66
0
   fe_sq_iter(t2, t1, 20);
67
0
   fe_mul(t1, t2, t1);
68
0
   fe_sq_iter(t1, t1, 10);
69
0
   fe_mul(t0, t1, t0);
70
0
   fe_sq_iter(t1, t0, 50);
71
0
   fe_mul(t1, t1, t0);
72
0
   fe_sq_iter(t2, t1, 100);
73
0
   fe_mul(t1, t2, t1);
74
0
   fe_sq_iter(t1, t1, 50);
75
0
   fe_mul(t0, t1, t0);
76
0
   fe_sq_iter(t0, t0, 2);
77
78
0
   fe_mul(t0, t0, z);
79
0
   return t0;
80
0
   }
81
82
/*
83
h = f * g
84
Can overlap h with f or g.
85
86
Preconditions:
87
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
89
90
Postconditions:
91
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
92
*/
93
94
/*
95
Notes on implementation strategy:
96
97
Using schoolbook multiplication.
98
Karatsuba would save a little in some cost models.
99
100
Most multiplications by 2 and 19 are 32-bit precomputations;
101
cheaper than 64-bit postcomputations.
102
103
There is one remaining multiplication by 19 in the carry chain;
104
one *19 precomputation can be merged into this,
105
but the resulting data flow is considerably less clean.
106
107
There are 12 carries below.
108
10 of them are 2-way parallelizable and vectorizable.
109
Can get away with 11 carries, but then data flow is much deeper.
110
111
With tighter constraints on inputs can squeeze carries into int32.
112
*/
113
114
//static
115
FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g)
116
2.37k
   {
117
2.37k
   const int32_t f0 = f[0];
118
2.37k
   const int32_t f1 = f[1];
119
2.37k
   const int32_t f2 = f[2];
120
2.37k
   const int32_t f3 = f[3];
121
2.37k
   const int32_t f4 = f[4];
122
2.37k
   const int32_t f5 = f[5];
123
2.37k
   const int32_t f6 = f[6];
124
2.37k
   const int32_t f7 = f[7];
125
2.37k
   const int32_t f8 = f[8];
126
2.37k
   const int32_t f9 = f[9];
127
128
2.37k
   const int32_t g0 = g[0];
129
2.37k
   const int32_t g1 = g[1];
130
2.37k
   const int32_t g2 = g[2];
131
2.37k
   const int32_t g3 = g[3];
132
2.37k
   const int32_t g4 = g[4];
133
2.37k
   const int32_t g5 = g[5];
134
2.37k
   const int32_t g6 = g[6];
135
2.37k
   const int32_t g7 = g[7];
136
2.37k
   const int32_t g8 = g[8];
137
2.37k
   const int32_t g9 = g[9];
138
139
2.37k
   const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
140
2.37k
   const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
141
2.37k
   const int32_t g3_19 = 19 * g3;
142
2.37k
   const int32_t g4_19 = 19 * g4;
143
2.37k
   const int32_t g5_19 = 19 * g5;
144
2.37k
   const int32_t g6_19 = 19 * g6;
145
2.37k
   const int32_t g7_19 = 19 * g7;
146
2.37k
   const int32_t g8_19 = 19 * g8;
147
2.37k
   const int32_t g9_19 = 19 * g9;
148
2.37k
   const int32_t f1_2 = 2 * f1;
149
2.37k
   const int32_t f3_2 = 2 * f3;
150
2.37k
   const int32_t f5_2 = 2 * f5;
151
2.37k
   const int32_t f7_2 = 2 * f7;
152
2.37k
   const int32_t f9_2 = 2 * f9;
153
154
2.37k
   const int64_t f0g0    = f0   * static_cast<int64_t>(g0);
155
2.37k
   const int64_t f0g1    = f0   * static_cast<int64_t>(g1);
156
2.37k
   const int64_t f0g2    = f0   * static_cast<int64_t>(g2);
157
2.37k
   const int64_t f0g3    = f0   * static_cast<int64_t>(g3);
158
2.37k
   const int64_t f0g4    = f0   * static_cast<int64_t>(g4);
159
2.37k
   const int64_t f0g5    = f0   * static_cast<int64_t>(g5);
160
2.37k
   const int64_t f0g6    = f0   * static_cast<int64_t>(g6);
161
2.37k
   const int64_t f0g7    = f0   * static_cast<int64_t>(g7);
162
2.37k
   const int64_t f0g8    = f0   * static_cast<int64_t>(g8);
163
2.37k
   const int64_t f0g9    = f0   * static_cast<int64_t>(g9);
164
2.37k
   const int64_t f1g0    = f1   * static_cast<int64_t>(g0);
165
2.37k
   const int64_t f1g1_2  = f1_2 * static_cast<int64_t>(g1);
166
2.37k
   const int64_t f1g2    = f1   * static_cast<int64_t>(g2);
167
2.37k
   const int64_t f1g3_2  = f1_2 * static_cast<int64_t>(g3);
168
2.37k
   const int64_t f1g4    = f1   * static_cast<int64_t>(g4);
169
2.37k
   const int64_t f1g5_2  = f1_2 * static_cast<int64_t>(g5);
170
2.37k
   const int64_t f1g6    = f1   * static_cast<int64_t>(g6);
171
2.37k
   const int64_t f1g7_2  = f1_2 * static_cast<int64_t>(g7);
172
2.37k
   const int64_t f1g8    = f1   * static_cast<int64_t>(g8);
173
2.37k
   const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
174
2.37k
   const int64_t f2g0    = f2   * static_cast<int64_t>(g0);
175
2.37k
   const int64_t f2g1    = f2   * static_cast<int64_t>(g1);
176
2.37k
   const int64_t f2g2    = f2   * static_cast<int64_t>(g2);
177
2.37k
   const int64_t f2g3    = f2   * static_cast<int64_t>(g3);
178
2.37k
   const int64_t f2g4    = f2   * static_cast<int64_t>(g4);
179
2.37k
   const int64_t f2g5    = f2   * static_cast<int64_t>(g5);
180
2.37k
   const int64_t f2g6    = f2   * static_cast<int64_t>(g6);
181
2.37k
   const int64_t f2g7    = f2   * static_cast<int64_t>(g7);
182
2.37k
   const int64_t f2g8_19 = f2   * static_cast<int64_t>(g8_19);
183
2.37k
   const int64_t f2g9_19 = f2   * static_cast<int64_t>(g9_19);
184
2.37k
   const int64_t f3g0    = f3   * static_cast<int64_t>(g0);
185
2.37k
   const int64_t f3g1_2  = f3_2 * static_cast<int64_t>(g1);
186
2.37k
   const int64_t f3g2    = f3   * static_cast<int64_t>(g2);
187
2.37k
   const int64_t f3g3_2  = f3_2 * static_cast<int64_t>(g3);
188
2.37k
   const int64_t f3g4    = f3   * static_cast<int64_t>(g4);
189
2.37k
   const int64_t f3g5_2  = f3_2 * static_cast<int64_t>(g5);
190
2.37k
   const int64_t f3g6    = f3   * static_cast<int64_t>(g6);
191
2.37k
   const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
192
2.37k
   const int64_t f3g8_19 = f3   * static_cast<int64_t>(g8_19);
193
2.37k
   const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
194
2.37k
   const int64_t f4g0    = f4   * static_cast<int64_t>(g0);
195
2.37k
   const int64_t f4g1    = f4   * static_cast<int64_t>(g1);
196
2.37k
   const int64_t f4g2    = f4   * static_cast<int64_t>(g2);
197
2.37k
   const int64_t f4g3    = f4   * static_cast<int64_t>(g3);
198
2.37k
   const int64_t f4g4    = f4   * static_cast<int64_t>(g4);
199
2.37k
   const int64_t f4g5    = f4   * static_cast<int64_t>(g5);
200
2.37k
   const int64_t f4g6_19 = f4   * static_cast<int64_t>(g6_19);
201
2.37k
   const int64_t f4g7_19 = f4   * static_cast<int64_t>(g7_19);
202
2.37k
   const int64_t f4g8_19 = f4   * static_cast<int64_t>(g8_19);
203
2.37k
   const int64_t f4g9_19 = f4   * static_cast<int64_t>(g9_19);
204
2.37k
   const int64_t f5g0    = f5   * static_cast<int64_t>(g0);
205
2.37k
   const int64_t f5g1_2  = f5_2 * static_cast<int64_t>(g1);
206
2.37k
   const int64_t f5g2    = f5   * static_cast<int64_t>(g2);
207
2.37k
   const int64_t f5g3_2  = f5_2 * static_cast<int64_t>(g3);
208
2.37k
   const int64_t f5g4    = f5   * static_cast<int64_t>(g4);
209
2.37k
   const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
210
2.37k
   const int64_t f5g6_19 = f5   * static_cast<int64_t>(g6_19);
211
2.37k
   const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
212
2.37k
   const int64_t f5g8_19 = f5   * static_cast<int64_t>(g8_19);
213
2.37k
   const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
214
2.37k
   const int64_t f6g0    = f6   * static_cast<int64_t>(g0);
215
2.37k
   const int64_t f6g1    = f6   * static_cast<int64_t>(g1);
216
2.37k
   const int64_t f6g2    = f6   * static_cast<int64_t>(g2);
217
2.37k
   const int64_t f6g3    = f6   * static_cast<int64_t>(g3);
218
2.37k
   const int64_t f6g4_19 = f6   * static_cast<int64_t>(g4_19);
219
2.37k
   const int64_t f6g5_19 = f6   * static_cast<int64_t>(g5_19);
220
2.37k
   const int64_t f6g6_19 = f6   * static_cast<int64_t>(g6_19);
221
2.37k
   const int64_t f6g7_19 = f6   * static_cast<int64_t>(g7_19);
222
2.37k
   const int64_t f6g8_19 = f6   * static_cast<int64_t>(g8_19);
223
2.37k
   const int64_t f6g9_19 = f6   * static_cast<int64_t>(g9_19);
224
2.37k
   const int64_t f7g0    = f7   * static_cast<int64_t>(g0);
225
2.37k
   const int64_t f7g1_2  = f7_2 * static_cast<int64_t>(g1);
226
2.37k
   const int64_t f7g2    = f7   * static_cast<int64_t>(g2);
227
2.37k
   const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
228
2.37k
   const int64_t f7g4_19 = f7   * static_cast<int64_t>(g4_19);
229
2.37k
   const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
230
2.37k
   const int64_t f7g6_19 = f7   * static_cast<int64_t>(g6_19);
231
2.37k
   const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
232
2.37k
   const int64_t f7g8_19 = f7   * static_cast<int64_t>(g8_19);
233
2.37k
   const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
234
2.37k
   const int64_t f8g0    = f8   * static_cast<int64_t>(g0);
235
2.37k
   const int64_t f8g1    = f8   * static_cast<int64_t>(g1);
236
2.37k
   const int64_t f8g2_19 = f8   * static_cast<int64_t>(g2_19);
237
2.37k
   const int64_t f8g3_19 = f8   * static_cast<int64_t>(g3_19);
238
2.37k
   const int64_t f8g4_19 = f8   * static_cast<int64_t>(g4_19);
239
2.37k
   const int64_t f8g5_19 = f8   * static_cast<int64_t>(g5_19);
240
2.37k
   const int64_t f8g6_19 = f8   * static_cast<int64_t>(g6_19);
241
2.37k
   const int64_t f8g7_19 = f8   * static_cast<int64_t>(g7_19);
242
2.37k
   const int64_t f8g8_19 = f8   * static_cast<int64_t>(g8_19);
243
2.37k
   const int64_t f8g9_19 = f8   * static_cast<int64_t>(g9_19);
244
2.37k
   const int64_t f9g0    = f9   * static_cast<int64_t>(g0);
245
2.37k
   const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
246
2.37k
   const int64_t f9g2_19 = f9   * static_cast<int64_t>(g2_19);
247
2.37k
   const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
248
2.37k
   const int64_t f9g4_19 = f9   * static_cast<int64_t>(g4_19);
249
2.37k
   const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
250
2.37k
   const int64_t f9g6_19 = f9   * static_cast<int64_t>(g6_19);
251
2.37k
   const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
252
2.37k
   const int64_t f9g8_19 = f9   * static_cast<int64_t>(g8_19);
253
2.37k
   const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
254
255
2.37k
   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
256
2.37k
   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
257
2.37k
   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
258
2.37k
   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
259
2.37k
   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
260
2.37k
   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
261
2.37k
   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
262
2.37k
   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
263
2.37k
   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
264
2.37k
   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
265
266
   /*
267
   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
268
   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
269
   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
270
   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
271
   */
272
2.37k
   carry<26>(h0, h1);
273
2.37k
   carry<26>(h4, h5);
274
275
   /* |h0| <= 2^25 */
276
   /* |h4| <= 2^25 */
277
   /* |h1| <= 1.71*2^59 */
278
   /* |h5| <= 1.71*2^59 */
279
280
2.37k
   carry<25>(h1, h2);
281
2.37k
   carry<25>(h5, h6);
282
283
   /* |h1| <= 2^24; from now on fits into int32 */
284
   /* |h5| <= 2^24; from now on fits into int32 */
285
   /* |h2| <= 1.41*2^60 */
286
   /* |h6| <= 1.41*2^60 */
287
288
2.37k
   carry<26>(h2, h3);
289
2.37k
   carry<26>(h6, h7);
290
   /* |h2| <= 2^25; from now on fits into int32 unchanged */
291
   /* |h6| <= 2^25; from now on fits into int32 unchanged */
292
   /* |h3| <= 1.71*2^59 */
293
   /* |h7| <= 1.71*2^59 */
294
295
2.37k
   carry<25>(h3, h4);
296
2.37k
   carry<25>(h7, h8);
297
   /* |h3| <= 2^24; from now on fits into int32 unchanged */
298
   /* |h7| <= 2^24; from now on fits into int32 unchanged */
299
   /* |h4| <= 1.72*2^34 */
300
   /* |h8| <= 1.41*2^60 */
301
302
2.37k
   carry<26>(h4, h5);
303
2.37k
   carry<26>(h8, h9);
304
   /* |h4| <= 2^25; from now on fits into int32 unchanged */
305
   /* |h8| <= 2^25; from now on fits into int32 unchanged */
306
   /* |h5| <= 1.01*2^24 */
307
   /* |h9| <= 1.71*2^59 */
308
309
2.37k
   carry<25, 19>(h9, h0);
310
311
   /* |h9| <= 2^24; from now on fits into int32 unchanged */
312
   /* |h0| <= 1.1*2^39 */
313
314
2.37k
   carry<26>(h0, h1);
315
   /* |h0| <= 2^25; from now on fits into int32 unchanged */
316
   /* |h1| <= 1.01*2^24 */
317
318
2.37k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
319
2.37k
   }
320
321
/*
322
h = f * f
323
Can overlap h with f.
324
325
Preconditions:
326
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
327
328
Postconditions:
329
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
330
*/
331
332
/*
333
See fe_mul.c for discussion of implementation strategy.
334
*/
335
336
//static
337
FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter)
338
115
   {
339
115
   int32_t f0 = f[0];
340
115
   int32_t f1 = f[1];
341
115
   int32_t f2 = f[2];
342
115
   int32_t f3 = f[3];
343
115
   int32_t f4 = f[4];
344
115
   int32_t f5 = f[5];
345
115
   int32_t f6 = f[6];
346
115
   int32_t f7 = f[7];
347
115
   int32_t f8 = f[8];
348
115
   int32_t f9 = f[9];
349
350
1.44k
   for(size_t i = 0; i != iter; ++i)
351
1.33k
      {
352
1.33k
      const int32_t f0_2 = 2 * f0;
353
1.33k
      const int32_t f1_2 = 2 * f1;
354
1.33k
      const int32_t f2_2 = 2 * f2;
355
1.33k
      const int32_t f3_2 = 2 * f3;
356
1.33k
      const int32_t f4_2 = 2 * f4;
357
1.33k
      const int32_t f5_2 = 2 * f5;
358
1.33k
      const int32_t f6_2 = 2 * f6;
359
1.33k
      const int32_t f7_2 = 2 * f7;
360
1.33k
      const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
361
1.33k
      const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
362
1.33k
      const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
363
1.33k
      const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
364
1.33k
      const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
365
366
1.33k
      const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
367
1.33k
      const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
368
1.33k
      const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
369
1.33k
      const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
370
1.33k
      const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
371
1.33k
      const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
372
1.33k
      const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
373
1.33k
      const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
374
1.33k
      const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
375
1.33k
      const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
376
1.33k
      const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
377
1.33k
      const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
378
1.33k
      const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
379
1.33k
      const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
380
1.33k
      const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
381
1.33k
      const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
382
1.33k
      const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
383
1.33k
      const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
384
1.33k
      const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
385
1.33k
      const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
386
1.33k
      const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
387
1.33k
      const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
388
1.33k
      const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
389
1.33k
      const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
390
1.33k
      const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
391
1.33k
      const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
392
1.33k
      const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
393
1.33k
      const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
394
1.33k
      const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
395
1.33k
      const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
396
1.33k
      const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
397
1.33k
      const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
398
1.33k
      const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
399
1.33k
      const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
400
1.33k
      const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
401
1.33k
      const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
402
1.33k
      const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
403
1.33k
      const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
404
1.33k
      const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
405
1.33k
      const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
406
1.33k
      const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
407
1.33k
      const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
408
1.33k
      const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
409
1.33k
      const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
410
1.33k
      const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
411
1.33k
      const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
412
1.33k
      const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
413
1.33k
      const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
414
1.33k
      const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
415
1.33k
      const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
416
1.33k
      const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
417
1.33k
      const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
418
1.33k
      const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
419
1.33k
      const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
420
1.33k
      const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
421
422
1.33k
      int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
423
1.33k
      int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
424
1.33k
      int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
425
1.33k
      int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
426
1.33k
      int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
427
1.33k
      int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
428
1.33k
      int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
429
1.33k
      int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
430
1.33k
      int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
431
1.33k
      int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
432
433
1.33k
      carry<26>(h0, h1);
434
1.33k
      carry<26>(h4, h5);
435
1.33k
      carry<25>(h1, h2);
436
1.33k
      carry<25>(h5, h6);
437
1.33k
      carry<26>(h2, h3);
438
1.33k
      carry<26>(h6, h7);
439
440
1.33k
      carry<25>(h3, h4);
441
1.33k
      carry<25>(h7, h8);
442
443
1.33k
      carry<26>(h4, h5);
444
1.33k
      carry<26>(h8, h9);
445
1.33k
      carry<25,19>(h9, h0);
446
1.33k
      carry<26>(h0, h1);
447
448
1.33k
      f0 = static_cast<int32_t>(h0);
449
1.33k
      f1 = static_cast<int32_t>(h1);
450
1.33k
      f2 = static_cast<int32_t>(h2);
451
1.33k
      f3 = static_cast<int32_t>(h3);
452
1.33k
      f4 = static_cast<int32_t>(h4);
453
1.33k
      f5 = static_cast<int32_t>(h5);
454
1.33k
      f6 = static_cast<int32_t>(h6);
455
1.33k
      f7 = static_cast<int32_t>(h7);
456
1.33k
      f8 = static_cast<int32_t>(h8);
457
1.33k
      f9 = static_cast<int32_t>(h9);
458
1.33k
      }
459
460
115
   return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
461
115
   }
462
463
/*
464
h = 2 * f * f
465
Can overlap h with f.
466
467
Preconditions:
468
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
469
470
Postconditions:
471
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
472
*/
473
474
/*
475
See fe_mul.c for discussion of implementation strategy.
476
*/
477
478
//static
479
FE_25519 FE_25519::sqr2(const FE_25519& f)
480
20
   {
481
20
   const int32_t f0 = f[0];
482
20
   const int32_t f1 = f[1];
483
20
   const int32_t f2 = f[2];
484
20
   const int32_t f3 = f[3];
485
20
   const int32_t f4 = f[4];
486
20
   const int32_t f5 = f[5];
487
20
   const int32_t f6 = f[6];
488
20
   const int32_t f7 = f[7];
489
20
   const int32_t f8 = f[8];
490
20
   const int32_t f9 = f[9];
491
20
   const int32_t f0_2 = 2 * f0;
492
20
   const int32_t f1_2 = 2 * f1;
493
20
   const int32_t f2_2 = 2 * f2;
494
20
   const int32_t f3_2 = 2 * f3;
495
20
   const int32_t f4_2 = 2 * f4;
496
20
   const int32_t f5_2 = 2 * f5;
497
20
   const int32_t f6_2 = 2 * f6;
498
20
   const int32_t f7_2 = 2 * f7;
499
20
   const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
500
20
   const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
501
20
   const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
502
20
   const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
503
20
   const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
504
20
   const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
505
20
   const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
506
20
   const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
507
20
   const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
508
20
   const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
509
20
   const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
510
20
   const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
511
20
   const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
512
20
   const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
513
20
   const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
514
20
   const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
515
20
   const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
516
20
   const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
517
20
   const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
518
20
   const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
519
20
   const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
520
20
   const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
521
20
   const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
522
20
   const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
523
20
   const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
524
20
   const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
525
20
   const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
526
20
   const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
527
20
   const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
528
20
   const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
529
20
   const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
530
20
   const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
531
20
   const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
532
20
   const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
533
20
   const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
534
20
   const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
535
20
   const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
536
20
   const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
537
20
   const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
538
20
   const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
539
20
   const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
540
20
   const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
541
20
   const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
542
20
   const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
543
20
   const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
544
20
   const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
545
20
   const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
546
20
   const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
547
20
   const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
548
20
   const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
549
20
   const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
550
20
   const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
551
20
   const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
552
20
   const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
553
20
   const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
554
20
   const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
555
20
   const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
556
20
   const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
557
20
   const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
558
20
   const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
559
560
20
   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
561
20
   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
562
20
   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
563
20
   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
564
20
   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
565
20
   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
566
20
   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
567
20
   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
568
20
   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
569
20
   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
570
571
20
   h0 += h0;
572
20
   h1 += h1;
573
20
   h2 += h2;
574
20
   h3 += h3;
575
20
   h4 += h4;
576
20
   h5 += h5;
577
20
   h6 += h6;
578
20
   h7 += h7;
579
20
   h8 += h8;
580
20
   h9 += h9;
581
582
20
   carry<26>(h0, h1);
583
20
   carry<26>(h4, h5);
584
585
20
   carry<25>(h1, h2);
586
20
   carry<25>(h5, h6);
587
588
20
   carry<26>(h2, h3);
589
20
   carry<26>(h6, h7);
590
591
20
   carry<25>(h3, h4);
592
20
   carry<25>(h7, h8);
593
20
   carry<26>(h4, h5);
594
20
   carry<26>(h8, h9);
595
20
   carry<25,19>(h9, h0);
596
20
   carry<26>(h0, h1);
597
598
20
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
599
20
   }
600
601
/*
602
Ignores top bit of h.
603
*/
604
605
void FE_25519::from_bytes(const uint8_t s[32])
606
0
   {
607
0
   int64_t h0 = load_4(s);
608
0
   int64_t h1 = load_3(s + 4) << 6;
609
0
   int64_t h2 = load_3(s + 7) << 5;
610
0
   int64_t h3 = load_3(s + 10) << 3;
611
0
   int64_t h4 = load_3(s + 13) << 2;
612
0
   int64_t h5 = load_4(s + 16);
613
0
   int64_t h6 = load_3(s + 20) << 7;
614
0
   int64_t h7 = load_3(s + 23) << 5;
615
0
   int64_t h8 = load_3(s + 26) << 4;
616
0
   int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
617
618
0
   carry<25,19>(h9, h0);
619
0
   carry<25>(h1, h2);
620
0
   carry<25>(h3, h4);
621
0
   carry<25>(h5, h6);
622
0
   carry<25>(h7, h8);
623
624
0
   carry<26>(h0, h1);
625
0
   carry<26>(h2, h3);
626
0
   carry<26>(h4, h5);
627
0
   carry<26>(h6, h7);
628
0
   carry<26>(h8, h9);
629
630
0
   m_fe[0] = static_cast<int32_t>(h0);
631
0
   m_fe[1] = static_cast<int32_t>(h1);
632
0
   m_fe[2] = static_cast<int32_t>(h2);
633
0
   m_fe[3] = static_cast<int32_t>(h3);
634
0
   m_fe[4] = static_cast<int32_t>(h4);
635
0
   m_fe[5] = static_cast<int32_t>(h5);
636
0
   m_fe[6] = static_cast<int32_t>(h6);
637
0
   m_fe[7] = static_cast<int32_t>(h7);
638
0
   m_fe[8] = static_cast<int32_t>(h8);
639
0
   m_fe[9] = static_cast<int32_t>(h9);
640
0
   }
641
642
/*
643
Preconditions:
644
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
645
646
Write p=2^255-19; q=floor(h/p).
647
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
648
649
Proof:
650
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
651
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
652
653
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
654
Then 0<y<1.
655
656
Write r=h-pq.
657
Have 0<=r<=p-1=2^255-20.
658
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
659
660
Write x=r+19(2^-255)r+y.
661
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
662
663
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
664
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
665
*/
666
667
void FE_25519::to_bytes(uint8_t s[32]) const
668
10
   {
669
10
   const int64_t X25 = (1 << 25);
670
671
10
   int32_t h0 = m_fe[0];
672
10
   int32_t h1 = m_fe[1];
673
10
   int32_t h2 = m_fe[2];
674
10
   int32_t h3 = m_fe[3];
675
10
   int32_t h4 = m_fe[4];
676
10
   int32_t h5 = m_fe[5];
677
10
   int32_t h6 = m_fe[6];
678
10
   int32_t h7 = m_fe[7];
679
10
   int32_t h8 = m_fe[8];
680
10
   int32_t h9 = m_fe[9];
681
10
   int32_t q;
682
683
10
   q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
684
10
   q = (h0 + q) >> 26;
685
10
   q = (h1 + q) >> 25;
686
10
   q = (h2 + q) >> 26;
687
10
   q = (h3 + q) >> 25;
688
10
   q = (h4 + q) >> 26;
689
10
   q = (h5 + q) >> 25;
690
10
   q = (h6 + q) >> 26;
691
10
   q = (h7 + q) >> 25;
692
10
   q = (h8 + q) >> 26;
693
10
   q = (h9 + q) >> 25;
694
695
   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
696
10
   h0 += 19 * q;
697
   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
698
699
10
   carry0<26>(h0, h1);
700
10
   carry0<25>(h1, h2);
701
10
   carry0<26>(h2, h3);
702
10
   carry0<25>(h3, h4);
703
10
   carry0<26>(h4, h5);
704
10
   carry0<25>(h5, h6);
705
10
   carry0<26>(h6, h7);
706
10
   carry0<25>(h7, h8);
707
10
   carry0<26>(h8, h9);
708
709
10
   int32_t carry9 = h9 >> 25;
710
10
   h9 -= carry9 * X25;
711
   /* h10 = carry9 */
712
713
   /*
714
   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
715
   Have h0+...+2^230 h9 between 0 and 2^255-1;
716
   evidently 2^255 h10-2^255 q = 0.
717
   Goal: Output h0+...+2^230 h9.
718
   */
719
720
10
   s[0] = static_cast<uint8_t>(h0 >> 0);
721
10
   s[1] = static_cast<uint8_t>(h0 >> 8);
722
10
   s[2] = static_cast<uint8_t>(h0 >> 16);
723
10
   s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2));
724
10
   s[4] = static_cast<uint8_t>(h1 >> 6);
725
10
   s[5] = static_cast<uint8_t>(h1 >> 14);
726
10
   s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3));
727
10
   s[7] = static_cast<uint8_t>(h2 >> 5);
728
10
   s[8] = static_cast<uint8_t>(h2 >> 13);
729
10
   s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5));
730
10
   s[10] = static_cast<uint8_t>(h3 >> 3);
731
10
   s[11] = static_cast<uint8_t>(h3 >> 11);
732
10
   s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6));
733
10
   s[13] = static_cast<uint8_t>(h4 >> 2);
734
10
   s[14] = static_cast<uint8_t>(h4 >> 10);
735
10
   s[15] = static_cast<uint8_t>(h4 >> 18);
736
10
   s[16] = static_cast<uint8_t>(h5 >> 0);
737
10
   s[17] = static_cast<uint8_t>(h5 >> 8);
738
10
   s[18] = static_cast<uint8_t>(h5 >> 16);
739
10
   s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1));
740
10
   s[20] = static_cast<uint8_t>(h6 >> 7);
741
10
   s[21] = static_cast<uint8_t>(h6 >> 15);
742
10
   s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3));
743
10
   s[23] = static_cast<uint8_t>(h7 >> 5);
744
10
   s[24] = static_cast<uint8_t>(h7 >> 13);
745
10
   s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4));
746
10
   s[26] = static_cast<uint8_t>(h8 >> 4);
747
10
   s[27] = static_cast<uint8_t>(h8 >> 12);
748
10
   s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6));
749
10
   s[29] = static_cast<uint8_t>(h9 >> 2);
750
10
   s[30] = static_cast<uint8_t>(h9 >> 10);
751
10
   s[31] = static_cast<uint8_t>(h9 >> 18);
752
10
   }
753
754
}