Coverage Report

Created: 2021-02-21 07:20

/src/botan/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line
Count
Source
1
/*
2
* Ed25519 field element
3
* (C) 2017 Ribose Inc
4
*
5
* Based on the public domain code from SUPERCOP ref10 by
6
* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10
11
#include <botan/internal/ed25519_fe.h>
12
#include <botan/internal/ed25519_internal.h>
13
14
namespace Botan {
15
16
//static
17
FE_25519 FE_25519::invert(const FE_25519& z)
18
89
   {
19
89
   fe t0;
20
89
   fe t1;
21
89
   fe t2;
22
89
   fe t3;
23
24
89
   fe_sq(t0, z);
25
89
   fe_sq_iter(t1, t0, 2);
26
89
   fe_mul(t1, z, t1);
27
89
   fe_mul(t0, t0, t1);
28
89
   fe_sq(t2, t0);
29
89
   fe_mul(t1, t1, t2);
30
89
   fe_sq_iter(t2, t1, 5);
31
89
   fe_mul(t1, t2, t1);
32
89
   fe_sq_iter(t2, t1, 10);
33
89
   fe_mul(t2, t2, t1);
34
89
   fe_sq_iter(t3, t2, 20);
35
89
   fe_mul(t2, t3, t2);
36
89
   fe_sq_iter(t2, t2, 10);
37
89
   fe_mul(t1, t2, t1);
38
89
   fe_sq_iter(t2, t1, 50);
39
89
   fe_mul(t2, t2, t1);
40
89
   fe_sq_iter(t3, t2, 100);
41
89
   fe_mul(t2, t3, t2);
42
89
   fe_sq_iter(t2, t2, 50);
43
89
   fe_mul(t1, t2, t1);
44
89
   fe_sq_iter(t1, t1, 5);
45
46
89
   fe_mul(t0, t1, t0);
47
89
   return t0;
48
89
   }
49
50
FE_25519 FE_25519::pow_22523(const fe& z)
51
89
   {
52
89
   fe t0;
53
89
   fe t1;
54
89
   fe t2;
55
56
89
   fe_sq(t0, z);
57
89
   fe_sq_iter(t1, t0, 2);
58
89
   fe_mul(t1, z, t1);
59
89
   fe_mul(t0, t0, t1);
60
89
   fe_sq(t0, t0);
61
89
   fe_mul(t0, t1, t0);
62
89
   fe_sq_iter(t1, t0, 5);
63
89
   fe_mul(t0, t1, t0);
64
89
   fe_sq_iter(t1, t0, 10);
65
89
   fe_mul(t1, t1, t0);
66
89
   fe_sq_iter(t2, t1, 20);
67
89
   fe_mul(t1, t2, t1);
68
89
   fe_sq_iter(t1, t1, 10);
69
89
   fe_mul(t0, t1, t0);
70
89
   fe_sq_iter(t1, t0, 50);
71
89
   fe_mul(t1, t1, t0);
72
89
   fe_sq_iter(t2, t1, 100);
73
89
   fe_mul(t1, t2, t1);
74
89
   fe_sq_iter(t1, t1, 50);
75
89
   fe_mul(t0, t1, t0);
76
89
   fe_sq_iter(t0, t0, 2);
77
78
89
   fe_mul(t0, t0, z);
79
89
   return t0;
80
89
   }
81
82
/*
83
h = f * g
84
Can overlap h with f or g.
85
86
Preconditions:
87
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
89
90
Postconditions:
91
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
92
*/
93
94
/*
95
Notes on implementation strategy:
96
97
Using schoolbook multiplication.
98
Karatsuba would save a little in some cost models.
99
100
Most multiplications by 2 and 19 are 32-bit precomputations;
101
cheaper than 64-bit postcomputations.
102
103
There is one remaining multiplication by 19 in the carry chain;
104
one *19 precomputation can be merged into this,
105
but the resulting data flow is considerably less clean.
106
107
There are 12 carries below.
108
10 of them are 2-way parallelizable and vectorizable.
109
Can get away with 11 carries, but then data flow is much deeper.
110
111
With tighter constraints on inputs can squeeze carries into int32.
112
*/
113
114
//static
115
FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g)
116
116k
   {
117
116k
   const int32_t f0 = f[0];
118
116k
   const int32_t f1 = f[1];
119
116k
   const int32_t f2 = f[2];
120
116k
   const int32_t f3 = f[3];
121
116k
   const int32_t f4 = f[4];
122
116k
   const int32_t f5 = f[5];
123
116k
   const int32_t f6 = f[6];
124
116k
   const int32_t f7 = f[7];
125
116k
   const int32_t f8 = f[8];
126
116k
   const int32_t f9 = f[9];
127
128
116k
   const int32_t g0 = g[0];
129
116k
   const int32_t g1 = g[1];
130
116k
   const int32_t g2 = g[2];
131
116k
   const int32_t g3 = g[3];
132
116k
   const int32_t g4 = g[4];
133
116k
   const int32_t g5 = g[5];
134
116k
   const int32_t g6 = g[6];
135
116k
   const int32_t g7 = g[7];
136
116k
   const int32_t g8 = g[8];
137
116k
   const int32_t g9 = g[9];
138
139
116k
   const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
140
116k
   const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
141
116k
   const int32_t g3_19 = 19 * g3;
142
116k
   const int32_t g4_19 = 19 * g4;
143
116k
   const int32_t g5_19 = 19 * g5;
144
116k
   const int32_t g6_19 = 19 * g6;
145
116k
   const int32_t g7_19 = 19 * g7;
146
116k
   const int32_t g8_19 = 19 * g8;
147
116k
   const int32_t g9_19 = 19 * g9;
148
116k
   const int32_t f1_2 = 2 * f1;
149
116k
   const int32_t f3_2 = 2 * f3;
150
116k
   const int32_t f5_2 = 2 * f5;
151
116k
   const int32_t f7_2 = 2 * f7;
152
116k
   const int32_t f9_2 = 2 * f9;
153
154
116k
   const int64_t f0g0    = f0   * static_cast<int64_t>(g0);
155
116k
   const int64_t f0g1    = f0   * static_cast<int64_t>(g1);
156
116k
   const int64_t f0g2    = f0   * static_cast<int64_t>(g2);
157
116k
   const int64_t f0g3    = f0   * static_cast<int64_t>(g3);
158
116k
   const int64_t f0g4    = f0   * static_cast<int64_t>(g4);
159
116k
   const int64_t f0g5    = f0   * static_cast<int64_t>(g5);
160
116k
   const int64_t f0g6    = f0   * static_cast<int64_t>(g6);
161
116k
   const int64_t f0g7    = f0   * static_cast<int64_t>(g7);
162
116k
   const int64_t f0g8    = f0   * static_cast<int64_t>(g8);
163
116k
   const int64_t f0g9    = f0   * static_cast<int64_t>(g9);
164
116k
   const int64_t f1g0    = f1   * static_cast<int64_t>(g0);
165
116k
   const int64_t f1g1_2  = f1_2 * static_cast<int64_t>(g1);
166
116k
   const int64_t f1g2    = f1   * static_cast<int64_t>(g2);
167
116k
   const int64_t f1g3_2  = f1_2 * static_cast<int64_t>(g3);
168
116k
   const int64_t f1g4    = f1   * static_cast<int64_t>(g4);
169
116k
   const int64_t f1g5_2  = f1_2 * static_cast<int64_t>(g5);
170
116k
   const int64_t f1g6    = f1   * static_cast<int64_t>(g6);
171
116k
   const int64_t f1g7_2  = f1_2 * static_cast<int64_t>(g7);
172
116k
   const int64_t f1g8    = f1   * static_cast<int64_t>(g8);
173
116k
   const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
174
116k
   const int64_t f2g0    = f2   * static_cast<int64_t>(g0);
175
116k
   const int64_t f2g1    = f2   * static_cast<int64_t>(g1);
176
116k
   const int64_t f2g2    = f2   * static_cast<int64_t>(g2);
177
116k
   const int64_t f2g3    = f2   * static_cast<int64_t>(g3);
178
116k
   const int64_t f2g4    = f2   * static_cast<int64_t>(g4);
179
116k
   const int64_t f2g5    = f2   * static_cast<int64_t>(g5);
180
116k
   const int64_t f2g6    = f2   * static_cast<int64_t>(g6);
181
116k
   const int64_t f2g7    = f2   * static_cast<int64_t>(g7);
182
116k
   const int64_t f2g8_19 = f2   * static_cast<int64_t>(g8_19);
183
116k
   const int64_t f2g9_19 = f2   * static_cast<int64_t>(g9_19);
184
116k
   const int64_t f3g0    = f3   * static_cast<int64_t>(g0);
185
116k
   const int64_t f3g1_2  = f3_2 * static_cast<int64_t>(g1);
186
116k
   const int64_t f3g2    = f3   * static_cast<int64_t>(g2);
187
116k
   const int64_t f3g3_2  = f3_2 * static_cast<int64_t>(g3);
188
116k
   const int64_t f3g4    = f3   * static_cast<int64_t>(g4);
189
116k
   const int64_t f3g5_2  = f3_2 * static_cast<int64_t>(g5);
190
116k
   const int64_t f3g6    = f3   * static_cast<int64_t>(g6);
191
116k
   const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
192
116k
   const int64_t f3g8_19 = f3   * static_cast<int64_t>(g8_19);
193
116k
   const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
194
116k
   const int64_t f4g0    = f4   * static_cast<int64_t>(g0);
195
116k
   const int64_t f4g1    = f4   * static_cast<int64_t>(g1);
196
116k
   const int64_t f4g2    = f4   * static_cast<int64_t>(g2);
197
116k
   const int64_t f4g3    = f4   * static_cast<int64_t>(g3);
198
116k
   const int64_t f4g4    = f4   * static_cast<int64_t>(g4);
199
116k
   const int64_t f4g5    = f4   * static_cast<int64_t>(g5);
200
116k
   const int64_t f4g6_19 = f4   * static_cast<int64_t>(g6_19);
201
116k
   const int64_t f4g7_19 = f4   * static_cast<int64_t>(g7_19);
202
116k
   const int64_t f4g8_19 = f4   * static_cast<int64_t>(g8_19);
203
116k
   const int64_t f4g9_19 = f4   * static_cast<int64_t>(g9_19);
204
116k
   const int64_t f5g0    = f5   * static_cast<int64_t>(g0);
205
116k
   const int64_t f5g1_2  = f5_2 * static_cast<int64_t>(g1);
206
116k
   const int64_t f5g2    = f5   * static_cast<int64_t>(g2);
207
116k
   const int64_t f5g3_2  = f5_2 * static_cast<int64_t>(g3);
208
116k
   const int64_t f5g4    = f5   * static_cast<int64_t>(g4);
209
116k
   const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
210
116k
   const int64_t f5g6_19 = f5   * static_cast<int64_t>(g6_19);
211
116k
   const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
212
116k
   const int64_t f5g8_19 = f5   * static_cast<int64_t>(g8_19);
213
116k
   const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
214
116k
   const int64_t f6g0    = f6   * static_cast<int64_t>(g0);
215
116k
   const int64_t f6g1    = f6   * static_cast<int64_t>(g1);
216
116k
   const int64_t f6g2    = f6   * static_cast<int64_t>(g2);
217
116k
   const int64_t f6g3    = f6   * static_cast<int64_t>(g3);
218
116k
   const int64_t f6g4_19 = f6   * static_cast<int64_t>(g4_19);
219
116k
   const int64_t f6g5_19 = f6   * static_cast<int64_t>(g5_19);
220
116k
   const int64_t f6g6_19 = f6   * static_cast<int64_t>(g6_19);
221
116k
   const int64_t f6g7_19 = f6   * static_cast<int64_t>(g7_19);
222
116k
   const int64_t f6g8_19 = f6   * static_cast<int64_t>(g8_19);
223
116k
   const int64_t f6g9_19 = f6   * static_cast<int64_t>(g9_19);
224
116k
   const int64_t f7g0    = f7   * static_cast<int64_t>(g0);
225
116k
   const int64_t f7g1_2  = f7_2 * static_cast<int64_t>(g1);
226
116k
   const int64_t f7g2    = f7   * static_cast<int64_t>(g2);
227
116k
   const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
228
116k
   const int64_t f7g4_19 = f7   * static_cast<int64_t>(g4_19);
229
116k
   const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
230
116k
   const int64_t f7g6_19 = f7   * static_cast<int64_t>(g6_19);
231
116k
   const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
232
116k
   const int64_t f7g8_19 = f7   * static_cast<int64_t>(g8_19);
233
116k
   const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
234
116k
   const int64_t f8g0    = f8   * static_cast<int64_t>(g0);
235
116k
   const int64_t f8g1    = f8   * static_cast<int64_t>(g1);
236
116k
   const int64_t f8g2_19 = f8   * static_cast<int64_t>(g2_19);
237
116k
   const int64_t f8g3_19 = f8   * static_cast<int64_t>(g3_19);
238
116k
   const int64_t f8g4_19 = f8   * static_cast<int64_t>(g4_19);
239
116k
   const int64_t f8g5_19 = f8   * static_cast<int64_t>(g5_19);
240
116k
   const int64_t f8g6_19 = f8   * static_cast<int64_t>(g6_19);
241
116k
   const int64_t f8g7_19 = f8   * static_cast<int64_t>(g7_19);
242
116k
   const int64_t f8g8_19 = f8   * static_cast<int64_t>(g8_19);
243
116k
   const int64_t f8g9_19 = f8   * static_cast<int64_t>(g9_19);
244
116k
   const int64_t f9g0    = f9   * static_cast<int64_t>(g0);
245
116k
   const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
246
116k
   const int64_t f9g2_19 = f9   * static_cast<int64_t>(g2_19);
247
116k
   const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
248
116k
   const int64_t f9g4_19 = f9   * static_cast<int64_t>(g4_19);
249
116k
   const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
250
116k
   const int64_t f9g6_19 = f9   * static_cast<int64_t>(g6_19);
251
116k
   const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
252
116k
   const int64_t f9g8_19 = f9   * static_cast<int64_t>(g8_19);
253
116k
   const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
254
255
116k
   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
256
116k
   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
257
116k
   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
258
116k
   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
259
116k
   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
260
116k
   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
261
116k
   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
262
116k
   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
263
116k
   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
264
116k
   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
265
266
   /*
267
   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
268
   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
269
   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
270
   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
271
   */
272
116k
   carry<26>(h0, h1);
273
116k
   carry<26>(h4, h5);
274
275
   /* |h0| <= 2^25 */
276
   /* |h4| <= 2^25 */
277
   /* |h1| <= 1.71*2^59 */
278
   /* |h5| <= 1.71*2^59 */
279
280
116k
   carry<25>(h1, h2);
281
116k
   carry<25>(h5, h6);
282
283
   /* |h1| <= 2^24; from now on fits into int32 */
284
   /* |h5| <= 2^24; from now on fits into int32 */
285
   /* |h2| <= 1.41*2^60 */
286
   /* |h6| <= 1.41*2^60 */
287
288
116k
   carry<26>(h2, h3);
289
116k
   carry<26>(h6, h7);
290
   /* |h2| <= 2^25; from now on fits into int32 unchanged */
291
   /* |h6| <= 2^25; from now on fits into int32 unchanged */
292
   /* |h3| <= 1.71*2^59 */
293
   /* |h7| <= 1.71*2^59 */
294
295
116k
   carry<25>(h3, h4);
296
116k
   carry<25>(h7, h8);
297
   /* |h3| <= 2^24; from now on fits into int32 unchanged */
298
   /* |h7| <= 2^24; from now on fits into int32 unchanged */
299
   /* |h4| <= 1.72*2^34 */
300
   /* |h8| <= 1.41*2^60 */
301
302
116k
   carry<26>(h4, h5);
303
116k
   carry<26>(h8, h9);
304
   /* |h4| <= 2^25; from now on fits into int32 unchanged */
305
   /* |h8| <= 2^25; from now on fits into int32 unchanged */
306
   /* |h5| <= 1.01*2^24 */
307
   /* |h9| <= 1.71*2^59 */
308
309
116k
   carry<25, 19>(h9, h0);
310
311
   /* |h9| <= 2^24; from now on fits into int32 unchanged */
312
   /* |h0| <= 1.1*2^39 */
313
314
116k
   carry<26>(h0, h1);
315
   /* |h0| <= 2^25; from now on fits into int32 unchanged */
316
   /* |h1| <= 1.01*2^24 */
317
318
116k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
319
116k
   }
320
321
/*
322
h = f * f
323
Can overlap h with f.
324
325
Preconditions:
326
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
327
328
Postconditions:
329
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
330
*/
331
332
/*
333
See fe_mul.c for discussion of implementation strategy.
334
*/
335
336
//static
337
FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter)
338
67.8k
   {
339
67.8k
   int32_t f0 = f[0];
340
67.8k
   int32_t f1 = f[1];
341
67.8k
   int32_t f2 = f[2];
342
67.8k
   int32_t f3 = f[3];
343
67.8k
   int32_t f4 = f[4];
344
67.8k
   int32_t f5 = f[5];
345
67.8k
   int32_t f6 = f[6];
346
67.8k
   int32_t f7 = f[7];
347
67.8k
   int32_t f8 = f[8];
348
67.8k
   int32_t f9 = f[9];
349
350
178k
   for(size_t i = 0; i != iter; ++i)
351
110k
      {
352
110k
      const int32_t f0_2 = 2 * f0;
353
110k
      const int32_t f1_2 = 2 * f1;
354
110k
      const int32_t f2_2 = 2 * f2;
355
110k
      const int32_t f3_2 = 2 * f3;
356
110k
      const int32_t f4_2 = 2 * f4;
357
110k
      const int32_t f5_2 = 2 * f5;
358
110k
      const int32_t f6_2 = 2 * f6;
359
110k
      const int32_t f7_2 = 2 * f7;
360
110k
      const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
361
110k
      const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
362
110k
      const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
363
110k
      const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
364
110k
      const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
365
366
110k
      const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
367
110k
      const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
368
110k
      const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
369
110k
      const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
370
110k
      const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
371
110k
      const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
372
110k
      const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
373
110k
      const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
374
110k
      const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
375
110k
      const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
376
110k
      const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
377
110k
      const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
378
110k
      const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
379
110k
      const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
380
110k
      const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
381
110k
      const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
382
110k
      const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
383
110k
      const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
384
110k
      const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
385
110k
      const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
386
110k
      const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
387
110k
      const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
388
110k
      const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
389
110k
      const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
390
110k
      const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
391
110k
      const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
392
110k
      const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
393
110k
      const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
394
110k
      const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
395
110k
      const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
396
110k
      const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
397
110k
      const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
398
110k
      const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
399
110k
      const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
400
110k
      const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
401
110k
      const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
402
110k
      const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
403
110k
      const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
404
110k
      const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
405
110k
      const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
406
110k
      const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
407
110k
      const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
408
110k
      const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
409
110k
      const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
410
110k
      const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
411
110k
      const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
412
110k
      const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
413
110k
      const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
414
110k
      const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
415
110k
      const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
416
110k
      const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
417
110k
      const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
418
110k
      const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
419
110k
      const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
420
110k
      const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
421
422
110k
      int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
423
110k
      int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
424
110k
      int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
425
110k
      int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
426
110k
      int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
427
110k
      int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
428
110k
      int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
429
110k
      int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
430
110k
      int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
431
110k
      int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
432
433
110k
      carry<26>(h0, h1);
434
110k
      carry<26>(h4, h5);
435
110k
      carry<25>(h1, h2);
436
110k
      carry<25>(h5, h6);
437
110k
      carry<26>(h2, h3);
438
110k
      carry<26>(h6, h7);
439
440
110k
      carry<25>(h3, h4);
441
110k
      carry<25>(h7, h8);
442
443
110k
      carry<26>(h4, h5);
444
110k
      carry<26>(h8, h9);
445
110k
      carry<25,19>(h9, h0);
446
110k
      carry<26>(h0, h1);
447
448
110k
      f0 = static_cast<int32_t>(h0);
449
110k
      f1 = static_cast<int32_t>(h1);
450
110k
      f2 = static_cast<int32_t>(h2);
451
110k
      f3 = static_cast<int32_t>(h3);
452
110k
      f4 = static_cast<int32_t>(h4);
453
110k
      f5 = static_cast<int32_t>(h5);
454
110k
      f6 = static_cast<int32_t>(h6);
455
110k
      f7 = static_cast<int32_t>(h7);
456
110k
      f8 = static_cast<int32_t>(h8);
457
110k
      f9 = static_cast<int32_t>(h9);
458
110k
      }
459
460
67.8k
   return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
461
67.8k
   }
462
463
/*
464
h = 2 * f * f
465
Can overlap h with f.
466
467
Preconditions:
468
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
469
470
Postconditions:
471
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
472
*/
473
474
/*
475
See fe_mul.c for discussion of implementation strategy.
476
*/
477
478
//static
479
FE_25519 FE_25519::sqr2(const FE_25519& f)
480
21.8k
   {
481
21.8k
   const int32_t f0 = f[0];
482
21.8k
   const int32_t f1 = f[1];
483
21.8k
   const int32_t f2 = f[2];
484
21.8k
   const int32_t f3 = f[3];
485
21.8k
   const int32_t f4 = f[4];
486
21.8k
   const int32_t f5 = f[5];
487
21.8k
   const int32_t f6 = f[6];
488
21.8k
   const int32_t f7 = f[7];
489
21.8k
   const int32_t f8 = f[8];
490
21.8k
   const int32_t f9 = f[9];
491
21.8k
   const int32_t f0_2 = 2 * f0;
492
21.8k
   const int32_t f1_2 = 2 * f1;
493
21.8k
   const int32_t f2_2 = 2 * f2;
494
21.8k
   const int32_t f3_2 = 2 * f3;
495
21.8k
   const int32_t f4_2 = 2 * f4;
496
21.8k
   const int32_t f5_2 = 2 * f5;
497
21.8k
   const int32_t f6_2 = 2 * f6;
498
21.8k
   const int32_t f7_2 = 2 * f7;
499
21.8k
   const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
500
21.8k
   const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
501
21.8k
   const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
502
21.8k
   const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
503
21.8k
   const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
504
21.8k
   const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
505
21.8k
   const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
506
21.8k
   const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
507
21.8k
   const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
508
21.8k
   const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
509
21.8k
   const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
510
21.8k
   const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
511
21.8k
   const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
512
21.8k
   const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
513
21.8k
   const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
514
21.8k
   const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
515
21.8k
   const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
516
21.8k
   const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
517
21.8k
   const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
518
21.8k
   const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
519
21.8k
   const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
520
21.8k
   const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
521
21.8k
   const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
522
21.8k
   const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
523
21.8k
   const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
524
21.8k
   const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
525
21.8k
   const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
526
21.8k
   const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
527
21.8k
   const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
528
21.8k
   const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
529
21.8k
   const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
530
21.8k
   const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
531
21.8k
   const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
532
21.8k
   const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
533
21.8k
   const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
534
21.8k
   const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
535
21.8k
   const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
536
21.8k
   const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
537
21.8k
   const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
538
21.8k
   const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
539
21.8k
   const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
540
21.8k
   const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
541
21.8k
   const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
542
21.8k
   const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
543
21.8k
   const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
544
21.8k
   const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
545
21.8k
   const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
546
21.8k
   const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
547
21.8k
   const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
548
21.8k
   const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
549
21.8k
   const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
550
21.8k
   const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
551
21.8k
   const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
552
21.8k
   const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
553
21.8k
   const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
554
21.8k
   const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
555
21.8k
   const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
556
21.8k
   const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
557
21.8k
   const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
558
21.8k
   const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
559
560
21.8k
   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
561
21.8k
   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
562
21.8k
   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
563
21.8k
   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
564
21.8k
   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
565
21.8k
   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
566
21.8k
   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
567
21.8k
   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
568
21.8k
   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
569
21.8k
   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
570
571
21.8k
   h0 += h0;
572
21.8k
   h1 += h1;
573
21.8k
   h2 += h2;
574
21.8k
   h3 += h3;
575
21.8k
   h4 += h4;
576
21.8k
   h5 += h5;
577
21.8k
   h6 += h6;
578
21.8k
   h7 += h7;
579
21.8k
   h8 += h8;
580
21.8k
   h9 += h9;
581
582
21.8k
   carry<26>(h0, h1);
583
21.8k
   carry<26>(h4, h5);
584
585
21.8k
   carry<25>(h1, h2);
586
21.8k
   carry<25>(h5, h6);
587
588
21.8k
   carry<26>(h2, h3);
589
21.8k
   carry<26>(h6, h7);
590
591
21.8k
   carry<25>(h3, h4);
592
21.8k
   carry<25>(h7, h8);
593
21.8k
   carry<26>(h4, h5);
594
21.8k
   carry<26>(h8, h9);
595
21.8k
   carry<25,19>(h9, h0);
596
21.8k
   carry<26>(h0, h1);
597
598
21.8k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
599
21.8k
   }
600
601
/*
602
Ignores top bit of h.
603
*/
604
605
void FE_25519::from_bytes(const uint8_t s[32])
606
89
   {
607
89
   int64_t h0 = load_4(s);
608
89
   int64_t h1 = load_3(s + 4) << 6;
609
89
   int64_t h2 = load_3(s + 7) << 5;
610
89
   int64_t h3 = load_3(s + 10) << 3;
611
89
   int64_t h4 = load_3(s + 13) << 2;
612
89
   int64_t h5 = load_4(s + 16);
613
89
   int64_t h6 = load_3(s + 20) << 7;
614
89
   int64_t h7 = load_3(s + 23) << 5;
615
89
   int64_t h8 = load_3(s + 26) << 4;
616
89
   int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
617
618
89
   carry<25,19>(h9, h0);
619
89
   carry<25>(h1, h2);
620
89
   carry<25>(h3, h4);
621
89
   carry<25>(h5, h6);
622
89
   carry<25>(h7, h8);
623
624
89
   carry<26>(h0, h1);
625
89
   carry<26>(h2, h3);
626
89
   carry<26>(h4, h5);
627
89
   carry<26>(h6, h7);
628
89
   carry<26>(h8, h9);
629
630
89
   m_fe[0] = static_cast<int32_t>(h0);
631
89
   m_fe[1] = static_cast<int32_t>(h1);
632
89
   m_fe[2] = static_cast<int32_t>(h2);
633
89
   m_fe[3] = static_cast<int32_t>(h3);
634
89
   m_fe[4] = static_cast<int32_t>(h4);
635
89
   m_fe[5] = static_cast<int32_t>(h5);
636
89
   m_fe[6] = static_cast<int32_t>(h6);
637
89
   m_fe[7] = static_cast<int32_t>(h7);
638
89
   m_fe[8] = static_cast<int32_t>(h8);
639
89
   m_fe[9] = static_cast<int32_t>(h9);
640
89
   }
641
642
/*
643
Preconditions:
644
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
645
646
Write p=2^255-19; q=floor(h/p).
647
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
648
649
Proof:
650
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
651
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
652
653
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
654
Then 0<y<1.
655
656
Write r=h-pq.
657
Have 0<=r<=p-1=2^255-20.
658
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
659
660
Write x=r+19(2^-255)r+y.
661
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
662
663
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
664
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
665
*/
666
667
void FE_25519::to_bytes(uint8_t s[32]) const
668
399
   {
669
399
   const int64_t X25 = (1 << 25);
670
671
399
   int32_t h0 = m_fe[0];
672
399
   int32_t h1 = m_fe[1];
673
399
   int32_t h2 = m_fe[2];
674
399
   int32_t h3 = m_fe[3];
675
399
   int32_t h4 = m_fe[4];
676
399
   int32_t h5 = m_fe[5];
677
399
   int32_t h6 = m_fe[6];
678
399
   int32_t h7 = m_fe[7];
679
399
   int32_t h8 = m_fe[8];
680
399
   int32_t h9 = m_fe[9];
681
399
   int32_t q;
682
683
399
   q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
684
399
   q = (h0 + q) >> 26;
685
399
   q = (h1 + q) >> 25;
686
399
   q = (h2 + q) >> 26;
687
399
   q = (h3 + q) >> 25;
688
399
   q = (h4 + q) >> 26;
689
399
   q = (h5 + q) >> 25;
690
399
   q = (h6 + q) >> 26;
691
399
   q = (h7 + q) >> 25;
692
399
   q = (h8 + q) >> 26;
693
399
   q = (h9 + q) >> 25;
694
695
   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
696
399
   h0 += 19 * q;
697
   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
698
699
399
   carry0<26>(h0, h1);
700
399
   carry0<25>(h1, h2);
701
399
   carry0<26>(h2, h3);
702
399
   carry0<25>(h3, h4);
703
399
   carry0<26>(h4, h5);
704
399
   carry0<25>(h5, h6);
705
399
   carry0<26>(h6, h7);
706
399
   carry0<25>(h7, h8);
707
399
   carry0<26>(h8, h9);
708
709
399
   int32_t carry9 = h9 >> 25;
710
399
   h9 -= carry9 * X25;
711
   /* h10 = carry9 */
712
713
   /*
714
   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
715
   Have h0+...+2^230 h9 between 0 and 2^255-1;
716
   evidently 2^255 h10-2^255 q = 0.
717
   Goal: Output h0+...+2^230 h9.
718
   */
719
720
399
   s[0] = static_cast<uint8_t>(h0 >> 0);
721
399
   s[1] = static_cast<uint8_t>(h0 >> 8);
722
399
   s[2] = static_cast<uint8_t>(h0 >> 16);
723
399
   s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2));
724
399
   s[4] = static_cast<uint8_t>(h1 >> 6);
725
399
   s[5] = static_cast<uint8_t>(h1 >> 14);
726
399
   s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3));
727
399
   s[7] = static_cast<uint8_t>(h2 >> 5);
728
399
   s[8] = static_cast<uint8_t>(h2 >> 13);
729
399
   s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5));
730
399
   s[10] = static_cast<uint8_t>(h3 >> 3);
731
399
   s[11] = static_cast<uint8_t>(h3 >> 11);
732
399
   s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6));
733
399
   s[13] = static_cast<uint8_t>(h4 >> 2);
734
399
   s[14] = static_cast<uint8_t>(h4 >> 10);
735
399
   s[15] = static_cast<uint8_t>(h4 >> 18);
736
399
   s[16] = static_cast<uint8_t>(h5 >> 0);
737
399
   s[17] = static_cast<uint8_t>(h5 >> 8);
738
399
   s[18] = static_cast<uint8_t>(h5 >> 16);
739
399
   s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1));
740
399
   s[20] = static_cast<uint8_t>(h6 >> 7);
741
399
   s[21] = static_cast<uint8_t>(h6 >> 15);
742
399
   s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3));
743
399
   s[23] = static_cast<uint8_t>(h7 >> 5);
744
399
   s[24] = static_cast<uint8_t>(h7 >> 13);
745
399
   s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4));
746
399
   s[26] = static_cast<uint8_t>(h8 >> 4);
747
399
   s[27] = static_cast<uint8_t>(h8 >> 12);
748
399
   s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6));
749
399
   s[29] = static_cast<uint8_t>(h9 >> 2);
750
399
   s[30] = static_cast<uint8_t>(h9 >> 10);
751
399
   s[31] = static_cast<uint8_t>(h9 >> 18);
752
399
   }
753
754
}